clipboards.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. """ io on the clipboard """
  2. from io import StringIO
  3. import warnings
  4. from pandas.core.dtypes.generic import ABCDataFrame
  5. from pandas import (
  6. get_option,
  7. option_context,
  8. )
  9. def read_clipboard(sep=r"\s+", **kwargs): # pragma: no cover
  10. r"""
  11. Read text from clipboard and pass to read_csv.
  12. Parameters
  13. ----------
  14. sep : str, default '\s+'
  15. A string or regex delimiter. The default of '\s+' denotes
  16. one or more whitespace characters.
  17. **kwargs
  18. See read_csv for the full argument list.
  19. Returns
  20. -------
  21. DataFrame
  22. A parsed DataFrame object.
  23. """
  24. encoding = kwargs.pop("encoding", "utf-8")
  25. # only utf-8 is valid for passed value because that's what clipboard
  26. # supports
  27. if encoding is not None and encoding.lower().replace("-", "") != "utf8":
  28. raise NotImplementedError("reading from clipboard only supports utf-8 encoding")
  29. from pandas.io.clipboard import clipboard_get
  30. from pandas.io.parsers import read_csv
  31. text = clipboard_get()
  32. # Try to decode (if needed, as "text" might already be a string here).
  33. try:
  34. text = text.decode(kwargs.get("encoding") or get_option("display.encoding"))
  35. except AttributeError:
  36. pass
  37. # Excel copies into clipboard with \t separation
  38. # inspect no more then the 10 first lines, if they
  39. # all contain an equal number (>0) of tabs, infer
  40. # that this came from excel and set 'sep' accordingly
  41. lines = text[:10000].split("\n")[:-1][:10]
  42. # Need to remove leading white space, since read_csv
  43. # accepts:
  44. # a b
  45. # 0 1 2
  46. # 1 3 4
  47. counts = {x.lstrip(" ").count("\t") for x in lines}
  48. if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
  49. sep = "\t"
  50. # check the number of leading tabs in the first line
  51. # to account for index columns
  52. index_length = len(lines[0]) - len(lines[0].lstrip(" \t"))
  53. if index_length != 0:
  54. kwargs.setdefault("index_col", list(range(index_length)))
  55. # Edge case where sep is specified to be None, return to default
  56. if sep is None and kwargs.get("delim_whitespace") is None:
  57. sep = r"\s+"
  58. # Regex separator currently only works with python engine.
  59. # Default to python if separator is multi-character (regex)
  60. if len(sep) > 1 and kwargs.get("engine") is None:
  61. kwargs["engine"] = "python"
  62. elif len(sep) > 1 and kwargs.get("engine") == "c":
  63. warnings.warn(
  64. "read_clipboard with regex separator does not work properly with c engine"
  65. )
  66. return read_csv(StringIO(text), sep=sep, **kwargs)
  67. def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover
  68. """
  69. Attempt to write text representation of object to the system clipboard
  70. The clipboard can be then pasted into Excel for example.
  71. Parameters
  72. ----------
  73. obj : the object to write to the clipboard
  74. excel : bool, defaults to True
  75. if True, use the provided separator, writing in a csv
  76. format for allowing easy pasting into excel.
  77. if False, write a string representation of the object
  78. to the clipboard
  79. sep : optional, defaults to tab
  80. other keywords are passed to to_csv
  81. Notes
  82. -----
  83. Requirements for your platform
  84. - Linux: xclip, or xsel (with PyQt4 modules)
  85. - Windows:
  86. - OS X:
  87. """
  88. encoding = kwargs.pop("encoding", "utf-8")
  89. # testing if an invalid encoding is passed to clipboard
  90. if encoding is not None and encoding.lower().replace("-", "") != "utf8":
  91. raise ValueError("clipboard only supports utf-8 encoding")
  92. from pandas.io.clipboard import clipboard_set
  93. if excel is None:
  94. excel = True
  95. if excel:
  96. try:
  97. if sep is None:
  98. sep = "\t"
  99. buf = StringIO()
  100. # clipboard_set (pyperclip) expects unicode
  101. obj.to_csv(buf, sep=sep, encoding="utf-8", **kwargs)
  102. text = buf.getvalue()
  103. clipboard_set(text)
  104. return
  105. except TypeError:
  106. warnings.warn(
  107. "to_clipboard in excel mode requires a single character separator."
  108. )
  109. elif sep is not None:
  110. warnings.warn("to_clipboard with excel=False ignores the sep argument")
  111. if isinstance(obj, ABCDataFrame):
  112. # str(df) has various unhelpful defaults, like truncation
  113. with option_context("display.max_colwidth", None):
  114. objstr = obj.to_string(**kwargs)
  115. else:
  116. objstr = str(obj)
  117. clipboard_set(objstr)