@@ -1343,108 +1343,7 @@ def str_pad(arr, width, side='left', fillchar=' '):
13431343
13441344
13451345def str_split (arr , pat = None , n = None ):
1346- """
1347- Split strings around given separator/delimiter.
1348-
1349- Split each string in the caller's values by given
1350- pattern, propagating NaN values. Equivalent to :meth:`str.split`.
1351-
1352- Parameters
1353- ----------
1354- pat : str, optional
1355- String or regular expression to split on.
1356- If not specified, split on whitespace.
1357- n : int, default -1 (all)
1358- Limit number of splits in output.
1359- ``None``, 0 and -1 will be interpreted as return all splits.
1360- expand : bool, default False
1361- Expand the split strings into separate columns.
1362-
1363- * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
1364- * If ``False``, return Series/Index, containing lists of strings.
13651346
1366- Returns
1367- -------
1368- Series, Index, DataFrame or MultiIndex
1369- Type matches caller unless ``expand=True`` (see Notes).
1370-
1371- Notes
1372- -----
1373- The handling of the `n` keyword depends on the number of found splits:
1374-
1375- - If found splits > `n`, make first `n` splits only
1376- - If found splits <= `n`, make all splits
1377- - If for a certain row the number of found splits < `n`,
1378- append `None` for padding up to `n` if ``expand=True``
1379-
1380- If using ``expand=True``, Series and Index callers return DataFrame and
1381- MultiIndex objects, respectively.
1382-
1383- See Also
1384- --------
1385- str.split : Standard library version of this method.
1386- Series.str.get_dummies : Split each string into dummy variables.
1387- Series.str.partition : Split string on a separator, returning
1388- the before, separator, and after components.
1389-
1390- Examples
1391- --------
1392- >>> s = pd.Series(["this is good text", "but this is even better"])
1393-
1394- By default, split will return an object of the same size
1395- having lists containing the split elements
1396-
1397- >>> s.str.split()
1398- 0 [this, is, good, text]
1399- 1 [but, this, is, even, better]
1400- dtype: object
1401- >>> s.str.split("random")
1402- 0 [this is good text]
1403- 1 [but this is even better]
1404- dtype: object
1405-
1406- When using ``expand=True``, the split elements will expand out into
1407- separate columns.
1408-
1409- For Series object, output return type is DataFrame.
1410-
1411- >>> s.str.split(expand=True)
1412- 0 1 2 3 4
1413- 0 this is good text None
1414- 1 but this is even better
1415- >>> s.str.split(" is ", expand=True)
1416- 0 1
1417- 0 this good text
1418- 1 but this even better
1419-
1420- For Index object, output return type is MultiIndex.
1421-
1422- >>> i = pd.Index(["ba 100 001", "ba 101 002", "ba 102 003"])
1423- >>> i.str.split(expand=True)
1424- MultiIndex(levels=[['ba'], ['100', '101', '102'], ['001', '002', '003']],
1425- labels=[[0, 0, 0], [0, 1, 2], [0, 1, 2]])
1426-
1427- Parameter `n` can be used to limit the number of splits in the output.
1428-
1429- >>> s.str.split("is", n=1)
1430- 0 [th, is good text]
1431- 1 [but th, is even better]
1432- dtype: object
1433- >>> s.str.split("is", n=1, expand=True)
1434- 0 1
1435- 0 th is good text
1436- 1 but th is even better
1437-
1438- If NaN is present, it is propagated throughout the columns
1439- during the split.
1440-
1441- >>> s = pd.Series(["this is good text", "but this is even better", np.nan])
1442- >>> s.str.split(n=3, expand=True)
1443- 0 1 2 3
1444- 0 this is good text
1445- 1 but this is even better
1446- 2 NaN NaN NaN NaN
1447- """
14481347 if pat is None :
14491348 if n is None or n == 0 :
14501349 n = - 1
@@ -1464,25 +1363,7 @@ def str_split(arr, pat=None, n=None):
14641363
14651364
14661365def str_rsplit (arr , pat = None , n = None ):
1467- """
1468- Split each string in the Series/Index by the given delimiter
1469- string, starting at the end of the string and working to the front.
1470- Equivalent to :meth:`str.rsplit`.
14711366
1472- Parameters
1473- ----------
1474- pat : string, default None
1475- Separator to split on. If None, splits on whitespace
1476- n : int, default -1 (all)
1477- None, 0 and -1 will be interpreted as return all splits
1478- expand : bool, default False
1479- * If True, return DataFrame/MultiIndex expanding dimensionality.
1480- * If False, return Series/Index.
1481-
1482- Returns
1483- -------
1484- split : Series/Index or DataFrame/MultiIndex of objects
1485- """
14861367 if n is None or n == 0 :
14871368 n = - 1
14881369 f = lambda x : x .rsplit (pat , n )
@@ -2325,12 +2206,133 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
23252206 res = Series (res , index = data .index , name = self ._orig .name )
23262207 return res
23272208
2328- @copy (str_split )
2209+ _shared_docs ['str_split' ] = ("""
2210+ Split strings around given separator/delimiter.
2211+
2212+ Splits the string in the Series/Index from the %(side)s,
2213+ at the specified delimiter string. Equivalent to :meth:`str.%(method)s`.
2214+
2215+ Parameters
2216+ ----------
2217+ pat : str, optional
2218+ String or regular expression to split on.
2219+ If not specified, split on whitespace.
2220+ n : int, default -1 (all)
2221+ Limit number of splits in output.
2222+ ``None``, 0 and -1 will be interpreted as return all splits.
2223+ expand : bool, default False
2224+ Expand the splitted strings into separate columns.
2225+
2226+ * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
2227+ * If ``False``, return Series/Index, containing lists of strings.
2228+
2229+ Returns
2230+ -------
2231+ Series, Index, DataFrame or MultiIndex
2232+ Type matches caller unless ``expand=True`` (see Notes).
2233+
2234+ See Also
2235+ --------
2236+ Series.str.split : Split strings around given separator/delimiter.
2237+ Series.str.rsplit : Splits string around given separator/delimiter,
2238+ starting from the right.
2239+ Series.str.join : Join lists contained as elements in the Series/Index
2240+ with passed delimiter.
2241+ str.split : Standard library version for split.
2242+ str.rsplit : Standard library version for rsplit.
2243+
2244+ Notes
2245+ -----
2246+ The handling of the `n` keyword depends on the number of found splits:
2247+
2248+ - If found splits > `n`, make first `n` splits only
2249+ - If found splits <= `n`, make all splits
2250+ - If for a certain row the number of found splits < `n`,
2251+ append `None` for padding up to `n` if ``expand=True``
2252+
2253+ If using ``expand=True``, Series and Index callers return DataFrame and
2254+ MultiIndex objects, respectively.
2255+
2256+ Examples
2257+ --------
2258+ >>> s = pd.Series(["this is a regular sentence",
2259+ "https://docs.python.org/3/tutorial/index.html", np.nan])
2260+
2261+ In the default setting, the string is split by whitespace.
2262+
2263+ >>> s.str.split()
2264+ 0 [this, is, a, regular, sentence]
2265+ 1 [https://docs.python.org/3/tutorial/index.html]
2266+ 2 NaN
2267+ dtype: object
2268+
2269+ Without the `n` parameter, the outputs of `rsplit` and `split`
2270+ are identical.
2271+
2272+ >>> s.str.rsplit()
2273+ 0 [this, is, a, regular, sentence]
2274+ 1 [https://docs.python.org/3/tutorial/index.html]
2275+ 2 NaN
2276+ dtype: object
2277+
2278+ The `n` parameter can be used to limit the number of splits on the
2279+ delimiter. The outputs of `split` and `rsplit` are different.
2280+
2281+ >>> s.str.split(n=2)
2282+ 0 [this, is, a regular sentence]
2283+ 1 [https://docs.python.org/3/tutorial/index.html]
2284+ 2 NaN
2285+ dtype: object
2286+
2287+ >>> s.str.rsplit(n=2)
2288+ 0 [this is a, regular, sentence]
2289+ 1 [https://docs.python.org/3/tutorial/index.html]
2290+ 2 NaN
2291+ dtype: object
2292+
2293+ The `pat` parameter can be used to split by other characters.
2294+
2295+ >>> s.str.split(pat = "/")
2296+ 0 [this is a regular sentence]
2297+ 1 [https:, , docs.python.org, 3, tutorial, index...
2298+ 2 NaN
2299+ dtype: object
2300+
2301+ When using ``expand=True``, the split elements will expand out into
2302+ separate columns. If NaN is present, it is propagated throughout
2303+ the columns during the split.
2304+
2305+ >>> s.str.split(expand=True)
2306+ 0 1 2 3
2307+ 0 this is a regular
2308+ 1 https://docs.python.org/3/tutorial/index.html None None None
2309+ 2 NaN NaN NaN NaN \
2310+
2311+ 4
2312+ 0 sentence
2313+ 1 None
2314+ 2 NaN
2315+
2316+ For slightly more complex use cases like splitting the html document name
2317+ from a url, a combination of parameter settings can be used.
2318+
2319+ >>> s.str.rsplit("/", n=1, expand=True)
2320+ 0 1
2321+ 0 this is a regular sentence None
2322+ 1 https://docs.python.org/3/tutorial index.html
2323+ 2 NaN NaN
2324+ """ )
2325+
2326+ @Appender (_shared_docs ['str_split' ] % {
2327+ 'side' : 'beginning' ,
2328+ 'method' : 'split' })
23292329 def split (self , pat = None , n = - 1 , expand = False ):
23302330 result = str_split (self ._data , pat , n = n )
23312331 return self ._wrap_result (result , expand = expand )
23322332
2333- @copy (str_rsplit )
2333+ @Appender (_shared_docs ['str_split' ] % {
2334+ 'side' : 'end' ,
2335+ 'method' : 'rsplit' })
23342336 def rsplit (self , pat = None , n = - 1 , expand = False ):
23352337 result = str_rsplit (self ._data , pat , n = n )
23362338 return self ._wrap_result (result , expand = expand )
0 commit comments