@@ -1346,104 +1346,20 @@ def str_split(arr, pat=None, n=None):
13461346 """
13471347 Split strings around given separator/delimiter.
13481348
1349- Split each string in the caller's values by given
1350- pattern, propagating NaN values. Equivalent to :meth:`str.split`.
1351-
13521349 Parameters
13531350 ----------
13541351 pat : str, optional
1355- String or regular expression to split on.
1356- If not specified, split on whitespace.
1352+ String or regular expression to split on; If not specified,
1353+ split on whitespace.
13571354 n : int, default -1 (all)
1358- Limit number of splits in output.
1359- ``None``, 0 and -1 will be interpreted as return all splits.
1355+ Limit number of splits in output; ``None``, 0 and -1 will
1356+ be interpreted as return all splits.
13601357 expand : bool, default False
13611358 Expand the splitted strings into separate columns.
13621359
1363- * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
1364- * If ``False``, return Series/Index, containing lists of strings.
1365-
13661360 Returns
13671361 -------
1368- Series, Index, DataFrame or MultiIndex
1369- Type matches caller unless ``expand=True`` (see Notes).
1370-
1371- Notes
1372- -----
1373- The handling of the `n` keyword depends on the number of found splits:
1374-
1375- - If found splits > `n`, make first `n` splits only
1376- - If found splits <= `n`, make all splits
1377- - If for a certain row the number of found splits < `n`,
1378- append `None` for padding up to `n` if ``expand=True``
1379-
1380- If using ``expand=True``, Series and Index callers return DataFrame and
1381- MultiIndex objects, respectively.
1382-
1383- See Also
1384- --------
1385- str.split : Standard library version of this method.
1386- Series.str.get_dummies : Split each string into dummy variables.
1387- Series.str.partition : Split string on a separator, returning
1388- the before, separator, and after components.
1389-
1390- Examples
1391- --------
1392- >>> s = pd.Series(["this is good text", "but this is even better"])
1393-
1394- By default, split will return an object of the same size
1395- having lists containing the split elements
1396-
1397- >>> s.str.split()
1398- 0 [this, is, good, text]
1399- 1 [but, this, is, even, better]
1400- dtype: object
1401- >>> s.str.split("random")
1402- 0 [this is good text]
1403- 1 [but this is even better]
1404- dtype: object
1405-
1406- When using ``expand=True``, the split elements will expand out into
1407- separate columns.
1408-
1409- For Series object, output return type is DataFrame.
1410-
1411- >>> s.str.split(expand=True)
1412- 0 1 2 3 4
1413- 0 this is good text None
1414- 1 but this is even better
1415- >>> s.str.split(" is ", expand=True)
1416- 0 1
1417- 0 this good text
1418- 1 but this even better
1419-
1420- For Index object, output return type is MultiIndex.
1421-
1422- >>> i = pd.Index(["ba 100 001", "ba 101 002", "ba 102 003"])
1423- >>> i.str.split(expand=True)
1424- MultiIndex(levels=[['ba'], ['100', '101', '102'], ['001', '002', '003']],
1425- labels=[[0, 0, 0], [0, 1, 2], [0, 1, 2]])
1426-
1427- Parameter `n` can be used to limit the number of splits in the output.
1428-
1429- >>> s.str.split("is", n=1)
1430- 0 [th, is good text]
1431- 1 [but th, is even better]
1432- dtype: object
1433- >>> s.str.split("is", n=1, expand=True)
1434- 0 1
1435- 0 th is good text
1436- 1 but th is even better
1437-
1438- If NaN is present, it is propagated throughout the columns
1439- during the split.
1440-
1441- >>> s = pd.Series(["this is good text", "but this is even better", np.nan])
1442- >>> s.str.split(n=3, expand=True)
1443- 0 1 2 3
1444- 0 this is good text
1445- 1 but this is even better
1446- 2 NaN NaN NaN NaN
1362+ Series, Index, DataFrame or MultiIndex
14471363 """
14481364 if pat is None :
14491365 if n is None or n == 0 :
@@ -1465,16 +1381,13 @@ def str_split(arr, pat=None, n=None):
14651381
14661382def str_rsplit (arr , pat = None , n = None ):
14671383 """
1468- Split strings around given separator/delimiter.
1469-
1470- Returns a list of the words from each string in
1471- Series/Index, separated by the delimiter string
1472- (starting from the right). Equivalent to :meth:`str.rsplit`.
1384+ Split strings around given separator/delimiter (starting from
1385+ the right).
14731386
14741387 Parameters
14751388 ----------
14761389 pat : string, default None
1477- Separator to split on. If None, splits on whitespace.
1390+ Separator to split on; If None, splits on whitespace.
14781391 n : int, default -1 (all)
14791392 None, 0 and -1 will be interpreted as return all splits.
14801393 expand : bool, default False
@@ -1483,54 +1396,7 @@ def str_rsplit(arr, pat=None, n=None):
14831396
14841397 Returns
14851398 -------
1486- Series/Index or DataFrame/MultiIndex of objects
1487-
1488- See Also
1489- --------
1490- str.rsplit : Standard library version of this method.
1491-
1492- Examples
1493- --------
1494- >>> s = pd.Series(["this is good text", "but this is even better"])
1495-
1496- By default, split will return an object of the same size
1497- having lists containing the split elements
1498-
1499- >>> s.str.rsplit()
1500- 0 [this, is, good, text]
1501- 1 [but, this, is, even, better]
1502- dtype: object
1503- >>> s.str.rsplit("random")
1504- 0 [this is good text]
1505- 1 [but this is even better]
1506- dtype: object
1507-
1508- When using ''expand=True'', the split elements will expand out into
1509- separate columns.
1510-
1511- For Series object, output return type is DataFrame.
1512-
1513- >>> s.str.rsplit(expand=True)
1514- 0 1 2 3 4
1515- 0 this is good text None
1516- 1 but this is even better
1517-
1518- Parameter 'n' can be used to limit the number of splits in the output.
1519-
1520- >>> s.str.rsplit("is", n=1)
1521- 0 [this , good text]
1522- 1 [but this , even better]
1523- dtype: object
1524-
1525- If NaN is present, it is propagated throughout the columns
1526- during the split.
1527-
1528- >>> s = pd.Series(["this is good text", "but this is even better", np.nan])
1529- >>> s.str.rsplit(n=3, expand=True)
1530- 0 1 2 3
1531- 0 this is good text
1532- 1 but this is even better
1533- 2 NaN NaN NaN NaN
1399+ Series/Index or DataFrame/MultiIndex of objects
15341400 """
15351401 if n is None or n == 0 :
15361402 n = - 1
@@ -2374,12 +2240,128 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
23742240 res = Series (res , index = data .index , name = self ._orig .name )
23752241 return res
23762242
2377- @copy (str_split )
2243+ _shared_docs ['str_split' ] = ("""
2244+ Split strings around given separator/delimiter.
2245+
2246+ Returns a list of the words from each string in Series/Index,
2247+ split by the given delimiter string, starting at the %(side)s of the
2248+ string. Equivalent to :meth:`str.%(method)s`.
2249+
2250+ Parameters
2251+ ----------
2252+ pat : str, optional
2253+ String or regular expression to split on.
2254+ If not specified, split on whitespace.
2255+ n : int, default -1 (all)
2256+ Limit number of splits in output.
2257+ ``None``, 0 and -1 will be interpreted as return all splits.
2258+ expand : bool, default False
2259+ Expand the splitted strings into separate columns.
2260+
2261+ * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
2262+ * If ``False``, return Series/Index, containing lists of strings.
2263+
2264+ Returns
2265+ -------
2266+ Series, Index, DataFrame or MultiIndex
2267+ Type matches caller unless ``expand=True`` (see Notes).
2268+
2269+ Notes
2270+ -----
2271+ The handling of the `n` keyword depends on the number of found splits:
2272+
2273+ - If found splits > `n`, make first `n` splits only
2274+ - If found splits <= `n`, make all splits
2275+ - If for a certain row the number of found splits < `n`,
2276+ append `None` for padding up to `n` if ``expand=True``
2277+
2278+ If using ``expand=True``, Series and Index callers return DataFrame and
2279+ MultiIndex objects, respectively.
2280+
2281+ See Also
2282+ --------
2283+ %(also)s
2284+
2285+ Examples
2286+ --------
2287+ >>> s = pd.Series(["this is good text", "but this is even better"])
2288+
2289+ By default, split and rsplit will return an object of the same size
2290+ having lists containing the split elements
2291+
2292+ >>> s.str.split()
2293+ 0 [this, is, good, text]
2294+ 1 [but, this, is, even, better]
2295+ dtype: object
2296+
2297+ >>> s.str.rsplit()
2298+ 0 [this, is, good, text]
2299+ 1 [but, this, is, even, better]
2300+ dtype: object
2301+
2302+ >>> s.str.split("random")
2303+ 0 [this is good text]
2304+ 1 [but this is even better]
2305+ dtype: object
2306+
2307+ >>> s.str.rsplit("random")
2308+ 0 [this is good text]
2309+ 1 [but this is even better]
2310+ dtype: object
2311+
2312+ When using ``expand=True``, the split and rsplit elements will expand out into
2313+ separate columns.
2314+
2315+ For Series object, output return type is DataFrame.
2316+
2317+ >>> s.str.split(expand=True)
2318+ 0 1 2 3 4
2319+ 0 this is good text None
2320+ 1 but this is even better
2321+
2322+ >>> s.str.split(" is ", expand=True)
2323+ 0 1
2324+ 0 this good text
2325+ 1 but this even better
2326+
2327+ Parameter `n` can be used to limit the number of splits in the output.
2328+
2329+ >>> s.str.split("is", n=1)
2330+ 0 [th, is good text]
2331+ 1 [but th, is even better]
2332+ dtype: object
2333+
2334+ >>> s.str.rsplit("is", n=1)
2335+ 0 [this , good text]
2336+ 1 [but this , even better]
2337+ dtype: object
2338+
2339+ If NaN is present, it is propagated throughout the columns
2340+ during the split.
2341+
2342+ >>> s = pd.Series(["this is good text", "but this is even better", np.nan])
2343+
2344+ >>> s.str.split(n=3, expand=True)
2345+ 0 1 2 3
2346+ 0 this is good text
2347+ 1 but this is even better
2348+ 2 NaN NaN NaN NaN
2349+
2350+ >>> s.str.rsplit(n=3, expand=True)
2351+ 0 1 2 3
2352+ 0 this is good text
2353+ 1 but this is even better
2354+ 2 NaN NaN NaN NaN
2355+ """ )
2356+
2357+ @Appender (_shared_docs ['str_split' ] % dict (side = 'start' ,
2358+ method = 'split' ))
23782359 def split (self , pat = None , n = - 1 , expand = False ):
23792360 result = str_split (self ._data , pat , n = n )
23802361 return self ._wrap_result (result , expand = expand )
23812362
2382- @copy (str_rsplit )
2363+ @Appender (_shared_docs ['str_split' ] % dict (side = 'end' ,
2364+ method = 'rsplit' ))
23832365 def rsplit (self , pat = None , n = - 1 , expand = False ):
23842366 result = str_rsplit (self ._data , pat , n = n )
23852367 return self ._wrap_result (result , expand = expand )
0 commit comments