diff --git a/pymc/model.py b/pymc/model.py index 185e30644c..4f089e5732 100644 --- a/pymc/model.py +++ b/pymc/model.py @@ -60,7 +60,7 @@ from pymc.distributions import joint_logpt from pymc.distributions.logprob import _get_scaling from pymc.distributions.transforms import _default_transform -from pymc.exceptions import ImputationWarning, SamplingError, ShapeError +from pymc.exceptions import ImputationWarning, SamplingError, ShapeError, ShapeWarning from pymc.initial_point import make_initial_point_fn from pymc.math import flatten_list from pymc.util import ( @@ -1067,12 +1067,6 @@ def add_coord( raise ValueError( f"Either `values` or `length` must be specified for the '{name}' dimension." ) - if isinstance(length, int): - length = at.constant(length) - elif length is not None and not isinstance(length, Variable): - raise ValueError( - f"The `length` passed for the '{name}' coord must be an Aesara Variable or None." - ) if values is not None: # Conversion to a tuple ensures that the coordinate values are immutable. # Also unlike numpy arrays the's tuple.index(...) which is handy to work with. @@ -1080,12 +1074,19 @@ def add_coord( if name in self.coords: if not np.array_equal(values, self.coords[name]): raise ValueError(f"Duplicate and incompatible coordinate: {name}.") - else: + if length is not None and not isinstance(length, (int, Variable)): + raise ValueError( + f"The `length` passed for the '{name}' coord must be an int, Aesara Variable or None." + ) + if length is None: + length = len(values) + if not isinstance(length, Variable): if mutable: - self._dim_lengths[name] = length or aesara.shared(len(values)) + length = aesara.shared(length) else: - self._dim_lengths[name] = length or aesara.tensor.constant(len(values)) - self._coords[name] = values + length = aesara.tensor.constant(length) + self._dim_lengths[name] = length + self._coords[name] = values def add_coords( self, @@ -1101,6 +1102,36 @@ def add_coords( for name, values in coords.items(): self.add_coord(name, values, length=lengths.get(name, None)) + def set_dim(self, name: str, new_length: int, coord_values: Optional[Sequence] = None): + """Update a mutable dimension. + + Parameters + ---------- + name + Name of the dimension. + new_length + New length of the dimension. + coord_values + Optional sequence of coordinate values. + """ + if not isinstance(self.dim_lengths[name], ScalarSharedVariable): + raise ValueError(f"The dimension '{name}' is immutable.") + if coord_values is None and self.coords.get(name, None) is not None: + raise ValueError( + f"'{name}' has coord values. Pass `set_dim(..., coord_values=...)` to update them." + ) + if coord_values is not None: + len_cvals = len(coord_values) + if len_cvals != new_length: + raise ShapeError( + f"Length of new coordinate values does not match the new dimension length.", + actual=len_cvals, + expected=new_length, + ) + self._coords[name] = tuple(coord_values) + self.dim_lengths[name].set_value(new_length) + return + def set_data( self, name: str, @@ -1163,16 +1194,40 @@ def set_data( f"{new_length}, so new coord values for the {dname} dimension are required." ) if isinstance(length_tensor, TensorConstant): + # The dimension was fixed in length. + # Resizing a data variable in this dimension would + # definitely lead to shape problems. raise ShapeError( f"Resizing dimension '{dname}' is impossible, because " - "a 'TensorConstant' stores its length. To be able " + "a `TensorConstant` stores its length. To be able " "to change the dimension length, pass `mutable=True` when " "registering the dimension via `model.add_coord`, " "or define it via a `pm.MutableData` variable." ) - else: + elif length_tensor.owner is not None: + # The dimension was created from a model variable. + # Get a handle on the tensor from which this dimension length was + # obtained by doing subindexing on the shape as in `.shape[i]`. + # Needed to check if it was another shared variable. + # TODO: Consider checking the graph is what we are assuming it is + # isinstance(length_tensor.owner.op, Subtensor) + # isinstance(length_tensor.owner.inputs[0].owner.op, Shape) length_belongs_to = length_tensor.owner.inputs[0].owner.inputs[0] - if not isinstance(length_belongs_to, SharedVariable): + if length_belongs_to is shared_object: + # No surprise it's changing. + pass + elif isinstance(length_belongs_to, SharedVariable): + # The dimension is mutable through a SharedVariable other than the one being modified. + # But the other variable was not yet re-sized! Warn the user to do that! + warnings.warn( + f"You are resizing a variable with dimension '{dname}' which was initialized " + f"as a mutable dimension by another variable ('{length_belongs_to}')." + " Remember to update that variable with the correct shape to avoid shape issues.", + ShapeWarning, + stacklevel=2, + ) + else: + # The dimension is immutable. raise ShapeError( f"Resizing dimension '{dname}' with values of length {new_length} would lead to incompatibilities, " f"because the dimension was initialized from '{length_belongs_to}' which is not a shared variable. " @@ -1182,8 +1237,9 @@ def set_data( expected=old_length, ) if isinstance(length_tensor, ScalarSharedVariable): - # Updating the shared variable resizes dependent nodes that use this dimension for their `size`. - length_tensor.set_value(new_length) + # The dimension is mutable, but was defined without being linked + # to a shared variable. This is allowed, but a little less robust. + self.set_dim(dname, new_length, coord_values=new_coords) if new_coords is not None: # Update the registered coord values (also if they were None) diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py index a4c2181906..2f8683e9e3 100644 --- a/pymc/tests/test_model.py +++ b/pymc/tests/test_model.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest +import warnings from functools import reduce @@ -37,7 +38,7 @@ from pymc import Deterministic, Potential from pymc.blocking import DictToArrayBijection, RaveledVars from pymc.distributions import Normal, transforms -from pymc.exceptions import ShapeError +from pymc.exceptions import ShapeError, ShapeWarning from pymc.model import Point, ValueGradFunction from pymc.tests.helpers import SeededTest @@ -704,25 +705,45 @@ def test_nested_model_coords(): assert set(m2.RV_dims) < set(m1.RV_dims) -def test_shapeerror_from_resize_immutable_dims(): +def test_shapeerror_from_set_data_dimensionality(): + with pm.Model() as pmodel: + pm.MutableData("m", np.ones((3,)), dims="one") + with pytest.raises(ValueError, match="must have 1 dimensions"): + pmodel.set_data("m", np.ones((3, 4))) + + +def test_shapeerror_from_resize_immutable_dim_from_RV(): """ Trying to resize an immutable dimension should raise a ShapeError. Even if the variable being updated is a SharedVariable and has other dimensions that are mutable. """ with pm.Model() as pmodel: - a = pm.Normal("a", mu=[1, 2, 3], dims="fixed") + pm.Normal("a", mu=[1, 2, 3], dims="fixed") + assert isinstance(pmodel.dim_lengths["fixed"], TensorVariable) - m = pm.MutableData("m", [[1, 2, 3]], dims=("one", "fixed")) + pm.MutableData("m", [[1, 2, 3]], dims=("one", "fixed")) # This is fine because the "fixed" dim is not resized - pm.set_data({"m": [[1, 2, 3], [3, 4, 5]]}) + pmodel.set_data("m", [[1, 2, 3], [3, 4, 5]]) with pytest.raises(ShapeError, match="was initialized from 'a'"): - # Can't work because the "fixed" dimension is linked to a constant shape: + # Can't work because the "fixed" dimension is linked to a + # TensorVariable with constant shape. # Note that the new data tries to change both dimensions - with pmodel: - pm.set_data({"m": [[1, 2], [3, 4]]}) + pmodel.set_data("m", [[1, 2], [3, 4]]) + + +def test_shapeerror_from_resize_immutable_dim_from_coords(): + with pm.Model(coords={"immutable": [1, 2]}) as pmodel: + assert isinstance(pmodel.dim_lengths["immutable"], TensorConstant) + pm.MutableData("m", [1, 2], dims="immutable") + # Data can be changed + pmodel.set_data("m", [3, 4]) + + with pytest.raises(ShapeError, match="`TensorConstant` stores its length"): + # But the length is linked to a TensorConstant + pmodel.set_data("m", [1, 2, 3], coords=dict(immutable=[1, 2, 3])) def test_valueerror_from_resize_without_coords_update(): @@ -765,6 +786,100 @@ def test_add_coord_mutable_kwarg(): assert isinstance(m._dim_lengths["mutable2"], TensorVariable) +def test_set_dim(): + """Test the concious re-sizing of dims created through add_coord().""" + with pm.Model() as pmodel: + pmodel.add_coord("fdim", mutable=False, length=1) + pmodel.add_coord("mdim", mutable=True, length=2) + a = pm.Normal("a", dims="mdim") + assert a.eval().shape == (2,) + + with pytest.raises(ValueError, match="is immutable"): + pmodel.set_dim("fdim", 3) + + pmodel.set_dim("mdim", 3) + assert a.eval().shape == (3,) + + +def test_set_dim_with_coords(): + """Test the concious re-sizing of dims created through add_coord() with coord value.""" + with pm.Model() as pmodel: + pmodel.add_coord("mdim", mutable=True, length=2, values=["A", "B"]) + a = pm.Normal("a", dims="mdim") + assert len(pmodel.coords["mdim"]) == 2 + + with pytest.raises(ValueError, match="has coord values"): + pmodel.set_dim("mdim", new_length=3) + + with pytest.raises(ShapeError, match="does not match"): + pmodel.set_dim("mdim", new_length=3, coord_values=["A", "B"]) + + pmodel.set_dim("mdim", 3, ["A", "B", "C"]) + assert a.eval().shape == (3,) + assert pmodel.coords["mdim"] == ("A", "B", "C") + + +def test_set_data_indirect_resize(): + with pm.Model() as pmodel: + pmodel.add_coord("mdim", mutable=True, length=2) + pm.MutableData("mdata", [1, 2], dims="mdim") + + # First resize the dimension. + pmodel.dim_lengths["mdim"].set_value(3) + # Then change the data. + with warnings.catch_warnings(): + warnings.simplefilter("error") + pmodel.set_data("mdata", [1, 2, 3]) + + # Now the other way around. + with warnings.catch_warnings(): + warnings.simplefilter("error") + pmodel.set_data("mdata", [1, 2, 3, 4]) + + +def test_set_data_warns_on_resize_of_dims_defined_by_other_mutabledata(): + with pm.Model() as pmodel: + pm.MutableData("m1", [1, 2], dims="mutable") + pm.MutableData("m2", [3, 4], dims="mutable") + + # Resizing the non-defining variable first gives a warning + with pytest.warns(ShapeWarning, match="by another variable"): + pmodel.set_data("m2", [4, 5, 6]) + pmodel.set_data("m1", [1, 2, 3]) + + # Resizing the definint variable first is silent + with warnings.catch_warnings(): + warnings.simplefilter("error") + pmodel.set_data("m1", [1, 2]) + pmodel.set_data("m2", [3, 4]) + + +def test_set_data_indirect_resize_with_coords(): + with pm.Model() as pmodel: + pmodel.add_coord("mdim", ["A", "B"], mutable=True, length=2) + pm.MutableData("mdata", [1, 2], dims="mdim") + + assert pmodel.coords["mdim"] == ("A", "B") + + # First resize the dimension. + pmodel.set_dim("mdim", 3, ["A", "B", "C"]) + assert pmodel.coords["mdim"] == ("A", "B", "C") + # Then change the data. + with warnings.catch_warnings(): + warnings.simplefilter("error") + pmodel.set_data("mdata", [1, 2, 3]) + + # Now the other way around. + with warnings.catch_warnings(): + warnings.simplefilter("error") + pmodel.set_data("mdata", [1, 2, 3, 4], coords=dict(mdim=["A", "B", "C", "D"])) + assert pmodel.coords["mdim"] == ("A", "B", "C", "D") + + # This time with incorrectly sized coord values + with pytest.raises(ShapeError, match="new coordinate values"): + pmodel.set_data("mdata", [1, 2], coords=dict(mdim=[1, 2, 3])) + + @pytest.mark.parametrize("jacobian", [True, False]) def test_model_logp(jacobian): with pm.Model() as m: