@@ -1380,6 +1380,50 @@ def orientation(self):
13801380 return 'vertical'
13811381
13821382
1383+ _kde_docstring = """
1384+ Generate Kernel Density Estimate plot using Gaussian kernels.
1385+
1386+ In statistics, `kernel density estimation`_ (KDE) is a non-parametric
1387+ way to estimate the probability density function (PDF) of a random
1388+ variable. This function uses Gaussian kernels and includes automatic
1389+ bandwith determination.
1390+
1391+ .. _kernel density estimation:
1392+ https://en.wikipedia.org/wiki/Kernel_density_estimation
1393+
1394+ Parameters
1395+ ----------
1396+ bw_method : str, scalar or callable, optional
1397+ The method used to calculate the estimator bandwidth. This can be
1398+ 'scott', 'silverman', a scalar constant or a callable.
1399+ If None (default), 'scott' is used.
1400+ See :class:`scipy.stats.gaussian_kde` for more information.
1401+ ind : NumPy array or integer, optional
1402+ Evaluation points for the estimated PDF. If None (default),
1403+ 1000 equally spaced points are used. If `ind` is a NumPy array, the
1404+ KDE is evaluated at the points passed. If `ind` is an integer,
1405+ `ind` number of equally spaced points are used.
1406+ **kwds : optional
1407+ Additional keyword arguments are documented in
1408+ :meth:`pandas.%(this-datatype)s.plot`.
1409+
1410+ Returns
1411+ -------
1412+ axes : matplotlib.AxesSubplot or np.array of them
1413+
1414+ See Also
1415+ --------
1416+ scipy.stats.gaussian_kde : Representation of a kernel-density
1417+ estimate using Gaussian kernels. This is the function used
1418+ internally to estimate the PDF.
1419+ %(sibling-datatype)s.plot.kde : Generate a KDE plot for a
1420+ %(sibling-datatype)s.
1421+
1422+ Examples
1423+ --------
1424+ %(examples)s
1425+ """
1426+
13831427class KdePlot (HistPlot ):
13841428 _kind = 'kde'
13851429 orientation = 'vertical'
@@ -2616,45 +2660,12 @@ def hist(self, bins=10, **kwds):
26162660 """
26172661 return self (kind = 'hist' , bins = bins , ** kwds )
26182662
2619- def kde (self , bw_method = None , ind = None , ** kwds ):
2620- """
2621- Kernel Density Estimate plot using Gaussian kernels.
2622-
2623- In statistics, kernel density estimation (KDE) is a non-parametric way
2624- to estimate the probability density function (PDF) of a random
2625- variable. This function uses Gaussian kernels and includes automatic
2626- bandwith determination.
2627-
2628- Parameters
2629- ----------
2630- bw_method : str, scalar or callable, optional
2631- The method used to calculate the estimator bandwidth. This can be
2632- 'scott', 'silverman', a scalar constant or a callable.
2633- If None (default), 'scott' is used.
2634- See :class:`scipy.stats.gaussian_kde` for more information.
2635- ind : NumPy array or integer, optional
2636- Evaluation points for the estimated PDF. If None (default),
2637- 1000 equally spaced points are used. If `ind` is a NumPy array, the
2638- kde is evaluated at the points passed. If `ind` is an integer,
2639- `ind` number of equally spaced points are used.
2640- kwds : optional
2641- Additional keyword arguments are documented in
2642- :meth:`pandas.Series.plot`.
2643-
2644- Returns
2645- -------
2646- axes : matplotlib.AxesSubplot or np.array of them
2647-
2648- See also
2649- --------
2650- scipy.stats.gaussian_kde : Representation of a kernel-density
2651- estimate using Gaussian kernels. This is the function used
2652- internally to estimate the PDF.
2653-
2654- Examples
2655- --------
2663+ @Appender (_kde_docstring % {
2664+ 'this-datatype' : 'Series' ,
2665+ 'sibling-datatype' : 'DataFrame' ,
2666+ 'examples' : """
26562667 Given a Series of points randomly sampled from an unknown
2657- distribution, estimate this distribution using KDE with automatic
2668+ distribution, estimate its PDF using KDE with automatic
26582669 bandwidth determination and plot the results, evaluating them at
26592670 1000 equally spaced points (default):
26602671
@@ -2664,10 +2675,9 @@ def kde(self, bw_method=None, ind=None, **kwds):
26642675 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
26652676 >>> ax = s.plot.kde()
26662677
2667-
2668- An scalar fixed bandwidth can be specified. Using a too small bandwidth
2669- can lead to overfitting, while a too large bandwidth can result in
2670- underfitting:
2678+ A scalar bandwidth can be specified. Using a small bandwidth value can
2679+ lead to overfitting, while using a large bandwidth value may result
2680+ in underfitting:
26712681
26722682 .. plot::
26732683 :context: close-figs
@@ -2686,7 +2696,9 @@ def kde(self, bw_method=None, ind=None, **kwds):
26862696 :context: close-figs
26872697
26882698 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])
2689- """
2699+ """ .strip ()
2700+ })
2701+ def kde (self , bw_method = None , ind = None , ** kwds ):
26902702 return self (kind = 'kde' , bw_method = bw_method , ind = ind , ** kwds )
26912703
26922704 density = kde
@@ -2849,30 +2861,48 @@ def hist(self, by=None, bins=10, **kwds):
28492861 """
28502862 return self (kind = 'hist' , by = by , bins = bins , ** kwds )
28512863
2852- def kde (self , bw_method = None , ind = None , ** kwds ):
2853- """
2854- Kernel Density Estimate plot
2864+ @Appender (_kde_docstring % {
2865+ 'this-datatype' : 'DataFrame' ,
2866+ 'sibling-datatype' : 'Series' ,
2867+ 'examples' : """
2868+ Given several Series of points randomly sampled from unknown
2869+ distributions, estimate their PDFs using KDE with automatic
2870+ bandwidth determination and plot the results, evaluating them at
2871+ 1000 equally spaced points (default):
28552872
2856- Parameters
2857- ----------
2858- bw_method: str, scalar or callable, optional
2859- The method used to calculate the estimator bandwidth. This can be
2860- 'scott', 'silverman', a scalar constant or a callable.
2861- If None (default), 'scott' is used.
2862- See :class:`scipy.stats.gaussian_kde` for more information.
2863- ind : NumPy array or integer, optional
2864- Evaluation points. If None (default), 1000 equally spaced points
2865- are used. If `ind` is a NumPy array, the kde is evaluated at the
2866- points passed. If `ind` is an integer, `ind` number of equally
2867- spaced points are used.
2868- `**kwds` : optional
2869- Additional keyword arguments are documented in
2870- :meth:`pandas.DataFrame.plot`.
2873+ .. plot::
2874+ :context: close-figs
28712875
2872- Returns
2873- -------
2874- axes : matplotlib.AxesSubplot or np.array of them
2875- """
2876+ >>> df = pd.DataFrame({
2877+ ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
2878+ ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
2879+ ... })
2880+ >>> ax = df.plot.kde()
2881+
2882+ A scalar bandwidth can be specified. Using a small bandwidth value can
2883+ lead to overfitting, while using a large bandwidth value may result
2884+ in underfitting:
2885+
2886+ .. plot::
2887+ :context: close-figs
2888+
2889+ >>> ax = df.plot.kde(bw_method=0.3)
2890+
2891+ .. plot::
2892+ :context: close-figs
2893+
2894+ >>> ax = df.plot.kde(bw_method=3)
2895+
2896+ Finally, the `ind` parameter determines the evaluation points for the
2897+ plot of the estimated PDF:
2898+
2899+ .. plot::
2900+ :context: close-figs
2901+
2902+ >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
2903+ """ .strip ()
2904+ })
2905+ def kde (self , bw_method = None , ind = None , ** kwds ):
28762906 return self (kind = 'kde' , bw_method = bw_method , ind = ind , ** kwds )
28772907
28782908 density = kde
0 commit comments