123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175 |
- import numpy as np
- import pytest
- from pandas import Series
- from pandas.core import strings as strings
- _any_string_method = [
- ("cat", (), {"sep": ","}),
- ("cat", (Series(list("zyx")),), {"sep": ",", "join": "left"}),
- ("center", (10,), {}),
- ("contains", ("a",), {}),
- ("count", ("a",), {}),
- ("decode", ("UTF-8",), {}),
- ("encode", ("UTF-8",), {}),
- ("endswith", ("a",), {}),
- ("endswith", ("a",), {"na": True}),
- ("endswith", ("a",), {"na": False}),
- ("extract", ("([a-z]*)",), {"expand": False}),
- ("extract", ("([a-z]*)",), {"expand": True}),
- ("extractall", ("([a-z]*)",), {}),
- ("find", ("a",), {}),
- ("findall", ("a",), {}),
- ("get", (0,), {}),
- # because "index" (and "rindex") fail intentionally
- # if the string is not found, search only for empty string
- ("index", ("",), {}),
- ("join", (",",), {}),
- ("ljust", (10,), {}),
- ("match", ("a",), {}),
- ("fullmatch", ("a",), {}),
- ("normalize", ("NFC",), {}),
- ("pad", (10,), {}),
- ("partition", (" ",), {"expand": False}),
- ("partition", (" ",), {"expand": True}),
- ("repeat", (3,), {}),
- ("replace", ("a", "z"), {}),
- ("rfind", ("a",), {}),
- ("rindex", ("",), {}),
- ("rjust", (10,), {}),
- ("rpartition", (" ",), {"expand": False}),
- ("rpartition", (" ",), {"expand": True}),
- ("slice", (0, 1), {}),
- ("slice_replace", (0, 1, "z"), {}),
- ("split", (" ",), {"expand": False}),
- ("split", (" ",), {"expand": True}),
- ("startswith", ("a",), {}),
- ("startswith", ("a",), {"na": True}),
- ("startswith", ("a",), {"na": False}),
- # translating unicode points of "a" to "d"
- ("translate", ({97: 100},), {}),
- ("wrap", (2,), {}),
- ("zfill", (10,), {}),
- ] + list(
- zip(
- [
- # methods without positional arguments: zip with empty tuple and empty dict
- "capitalize",
- "cat",
- "get_dummies",
- "isalnum",
- "isalpha",
- "isdecimal",
- "isdigit",
- "islower",
- "isnumeric",
- "isspace",
- "istitle",
- "isupper",
- "len",
- "lower",
- "lstrip",
- "partition",
- "rpartition",
- "rsplit",
- "rstrip",
- "slice",
- "slice_replace",
- "split",
- "strip",
- "swapcase",
- "title",
- "upper",
- "casefold",
- ],
- [()] * 100,
- [{}] * 100,
- )
- )
- ids, _, _ = zip(*_any_string_method) # use method name as fixture-id
- missing_methods = {
- f for f in dir(strings.StringMethods) if not f.startswith("_")
- } - set(ids)
- # test that the above list captures all methods of StringMethods
- assert not missing_methods
- @pytest.fixture(params=_any_string_method, ids=ids)
- def any_string_method(request):
- """
- Fixture for all public methods of `StringMethods`
- This fixture returns a tuple of the method name and sample arguments
- necessary to call the method.
- Returns
- -------
- method_name : str
- The name of the method in `StringMethods`
- args : tuple
- Sample values for the positional arguments
- kwargs : dict
- Sample values for the keyword arguments
- Examples
- --------
- >>> def test_something(any_string_method):
- ... s = Series(['a', 'b', np.nan, 'd'])
- ...
- ... method_name, args, kwargs = any_string_method
- ... method = getattr(s.str, method_name)
- ... # will not raise
- ... method(*args, **kwargs)
- """
- return request.param
- # subset of the full set from pandas/conftest.py
- _any_allowed_skipna_inferred_dtype = [
- ("string", ["a", np.nan, "c"]),
- ("bytes", [b"a", np.nan, b"c"]),
- ("empty", [np.nan, np.nan, np.nan]),
- ("empty", []),
- ("mixed-integer", ["a", np.nan, 2]),
- ]
- ids, _ = zip(*_any_allowed_skipna_inferred_dtype) # use inferred type as id
- @pytest.fixture(params=_any_allowed_skipna_inferred_dtype, ids=ids)
- def any_allowed_skipna_inferred_dtype(request):
- """
- Fixture for all (inferred) dtypes allowed in StringMethods.__init__
- The covered (inferred) types are:
- * 'string'
- * 'empty'
- * 'bytes'
- * 'mixed'
- * 'mixed-integer'
- Returns
- -------
- inferred_dtype : str
- The string for the inferred dtype from _libs.lib.infer_dtype
- values : np.ndarray
- An array of object dtype that will be inferred to have
- `inferred_dtype`
- Examples
- --------
- >>> import pandas._libs.lib as lib
- >>>
- >>> def test_something(any_allowed_skipna_inferred_dtype):
- ... inferred_dtype, values = any_allowed_skipna_inferred_dtype
- ... # will pass
- ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype
- ...
- ... # constructor for .str-accessor will also pass
- ... Series(values).str
- """
- inferred_dtype, values = request.param
- values = np.array(values, dtype=object) # object dtype to avoid casting
- # correctness of inference tested in tests/dtypes/test_inference.py
- return inferred_dtype, values
|