Skip to content

Commit e9b0e6f

Browse files
committed
wip: update to final Protocol - Histogram only
1 parent e50d605 commit e9b0e6f

File tree

4 files changed

+80
-27
lines changed

4 files changed

+80
-27
lines changed

src/__init__.py

Whitespace-only changes.

src/boost_histogram/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import absolute_import, division, print_function
33

4-
from ._internal.hist import Histogram, Implementation
4+
from ._internal.hist import Histogram
5+
from ._internal.enum import Kind
56
from . import axis, storage, accumulators, utils, numpy
67
from .tag import loc, rebin, sum, underflow, overflow
78

@@ -37,7 +38,7 @@
3738

3839
__all__ = (
3940
"Histogram",
40-
"Implementation",
41+
"Kind",
4142
"axis",
4243
"storage",
4344
"accumulators",

src/boost_histogram/_internal/enum.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@
1010
Enum = object # type: ignore
1111

1212

13-
class Implementation(str, Enum):
14-
sum = "sum"
15-
mean = "mean"
13+
class Kind(str, Enum):
14+
COUNT = "COUNT"
15+
MEAN = "MEAN"

src/boost_histogram/_internal/hist.py

Lines changed: 74 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import threading
77
import warnings
88

9+
from typing import Optional, Any, Tuple
10+
911
import numpy as np
1012

1113
from .. import _core
@@ -17,7 +19,10 @@
1719
from .storage import Double, Storage
1820
from .utils import cast, register, set_family, MAIN_FAMILY, set_module
1921
from .view import _to_view
20-
from .enum import Implementation
22+
from .enum import Kind
23+
24+
25+
ArrayLike = Any
2126

2227

2328
NOTHING = object()
@@ -207,7 +212,7 @@ def _new_hist(self, _hist, memo=NOTHING):
207212
@property
208213
def ndim(self):
209214
"""
210-
Number of axes (dimensions) of histogram.
215+
Number of axes (dimensions) of the histogram.
211216
"""
212217
return self._hist.rank()
213218

@@ -566,6 +571,7 @@ def reset(self):
566571
return self
567572

568573
def empty(self, flow=False):
574+
# type: (bool) -> bool
569575
"""
570576
Check to see if the histogram has any non-default values.
571577
You can use flow=True to check flow bins too.
@@ -580,6 +586,7 @@ def sum(self, flow=False):
580586

581587
@property
582588
def rank(self):
589+
# type: () -> int
583590
"""
584591
Number of axes (dimensions) of histogram. DEPRECATED, use ndim.
585592
"""
@@ -589,13 +596,15 @@ def rank(self):
589596

590597
@property
591598
def size(self):
599+
# type: () -> int
592600
"""
593601
Total number of bins in the histogram (including underflow/overflow).
594602
"""
595603
return self._hist.size()
596604

597605
@property
598606
def shape(self):
607+
# type: () -> Tuple[int, ...]
599608
"""
600609
Tuple of axis sizes (not including underflow/overflow).
601610
"""
@@ -780,8 +789,9 @@ def __setitem__(self, index, value):
780789
view[tuple(indexes)] = value
781790

782791
def project(self, *args):
792+
# type: (Axis) -> Histogram
783793
"""
784-
Project to a single axis or several axes on a multidiminsional histogram.
794+
Project to a single axis or several axes on a multidimensional histogram.
785795
Provided a list of axis numbers, this will produce the histogram over
786796
those axes only. Flow bins are used if available.
787797
"""
@@ -791,25 +801,31 @@ def project(self, *args):
791801
# Implementation of PlottableHistogram
792802

793803
@property
794-
def implementation(self):
804+
def kind(self):
805+
# type: () -> Kind
795806
"""
796-
Returns "count" if this is a normal summing histogram, and "mean" if this is a
807+
Returns Kind.COUNT if this is a normal summing histogram, and Kind.MEAN if this is a
797808
mean histogram.
798809
799-
:return: Implementation
810+
:return: Kind
800811
"""
801812
if self._storage_type in {_core.storage.mean, _core.storage.weighted_mean}:
802-
return Implementation.mean
813+
return Kind.MEAN
803814
else:
804-
return Implementation.sum
815+
return Kind.COUNT
805816

806817
def values(self, flow=False):
818+
# type: (bool) -> ArrayLike
807819
"""
808-
Return the histogram values (weighed count or samples) for any storage. Avoids a
809-
copy when possible. Is identical to counts for summing histogram, and is the mean
810-
for mean histograms.
820+
Returns the accumulated values. The counts for simple histograms, the
821+
sum of weights for weighted histograms, the mean for profiles, etc.
822+
823+
If counts is equal to 0, the value in that cell is undefined if
824+
kind == "MEAN".
825+
826+
:param flow: Enable flow bins. Not part of PlottableHistogram, but
827+
included for consistency with other methods and flexibility.
811828
812-
:param flow: Enable flow bins. Not part of PlottableHistogram, but included for consitancy and flexibilty.
813829
:return: np.ndarray[np.float64]
814830
"""
815831

@@ -820,35 +836,71 @@ def values(self, flow=False):
820836
return view.value
821837

822838
def variances(self, flow=False):
839+
# type: (bool) -> Optional[ArrayLike]
823840
"""
824-
Return the histogram variance of the value if the storage supports it. Avoids a
825-
copy when possible. Returns values when a storage does not have an explicit variance.
841+
Returns the estimated variance of the accumulated values. The sum of squared
842+
weights for weighted histograms, the variance of samples for profiles, etc.
843+
For an unweighed histogram where kind == "COUNT", this should return the same
844+
as values if the histogram was not filled with weights, and None otherwise.
845+
If counts is equal to 1 or less, the variance in that cell is undefined if
846+
kind == "MEAN".
847+
848+
If kind == "MEAN", the counts can be used to compute the error on the mean
849+
as sqrt(variances / counts), this works whether or not the entries are
850+
weighted if the weight variance was tracked by the implementation.
851+
852+
Currently, this always returns - but in the future, it will return None
853+
if a weighted fill is made on a unweighed storage.
854+
855+
:param flow: Enable flow bins. Not part of PlottableHistogram, but
856+
included for consistency with other methods and flexibility.
826857
827-
:param flow: Enable flow bins. Not part of PlottableHistogram, but included for
828-
consitancy and flexibilty.
829858
:return: np.ndarray[np.float64]
830859
"""
831860

861+
# TODO: return None if a weighed fill is made on a simple storage.
862+
832863
view = self.view(flow)
833864
if len(view.dtype) == 0:
834-
return None
865+
return view
835866
else:
836867
return view.variance
837868

838869
def counts(self, flow=False):
839-
"""
840-
Return the histogram number of counts. Avoids a copy when possible.
870+
# type: (bool) -> Optional[ArrayLike]
871+
"""
872+
Returns the number of entries in each bin for an unweighted
873+
histogram or profile and an effective number of entries (defined below)
874+
for a weighted histogram or profile. An exotic generalized histogram could
875+
have no sensible .counts, so this is Optional and should be checked by
876+
Consumers.
877+
878+
For a weighted histogram, counts is defined as sum_of_weights ** 2 /
879+
sum_of_weights_squared. It is equal or less than the number of times
880+
the bin was filled, the equality holds when all filled weights are equal.
881+
The larger the spread in weights, the smaller it is, but it is always 0
882+
if filled 0 times, and 1 if filled once, and more than 1 otherwise.
883+
884+
If kind == "MEAN", counts (effective or not) can and should be used to
885+
determine whether the mean value and its variance should be displayed
886+
(see documentation of values and variances, respectively). The counts
887+
should also be used to compute the error on the mean (see documentation
888+
of variances).
841889
842-
:param flow: Enable flow bins. Not part of PlottableHistogram, but included for
843-
consitancy and flexibilty.
844890
:return: np.ndarray[np.float64]
845891
"""
846892

847893
view = self.view(flow)
894+
848895
if len(view.dtype) == 0:
849896
return view
850897
elif hasattr(view, "sum_of_weights"):
851-
return view.sum_of_weights
898+
return np.divide(
899+
view.sum_of_weights ** 2,
900+
view.sum_of_weights_squared,
901+
out=np.zeros_like(view.sum_of_weights, dtype=np.float64),
902+
where=view.sum_of_weights_squared != 0,
903+
)
852904
elif hasattr(view, "count"):
853905
return view.count
854906
else:

0 commit comments

Comments
 (0)