Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 72 additions & 16 deletions pymc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
from pymc.distributions import joint_logpt
from pymc.distributions.logprob import _get_scaling
from pymc.distributions.transforms import _default_transform
from pymc.exceptions import ImputationWarning, SamplingError, ShapeError
from pymc.exceptions import ImputationWarning, SamplingError, ShapeError, ShapeWarning
from pymc.initial_point import make_initial_point_fn
from pymc.math import flatten_list
from pymc.util import (
Expand Down Expand Up @@ -1067,25 +1067,26 @@ def add_coord(
raise ValueError(
f"Either `values` or `length` must be specified for the '{name}' dimension."
)
if isinstance(length, int):
length = at.constant(length)
elif length is not None and not isinstance(length, Variable):
raise ValueError(
f"The `length` passed for the '{name}' coord must be an Aesara Variable or None."
)
if values is not None:
# Conversion to a tuple ensures that the coordinate values are immutable.
# Also unlike numpy arrays the's tuple.index(...) which is handy to work with.
values = tuple(values)
if name in self.coords:
if not np.array_equal(values, self.coords[name]):
raise ValueError(f"Duplicate and incompatible coordinate: {name}.")
else:
if length is not None and not isinstance(length, (int, Variable)):
raise ValueError(
f"The `length` passed for the '{name}' coord must be an int, Aesara Variable or None."
)
if length is None:
length = len(values)
if not isinstance(length, Variable):
if mutable:
self._dim_lengths[name] = length or aesara.shared(len(values))
length = aesara.shared(length)
else:
self._dim_lengths[name] = length or aesara.tensor.constant(len(values))
self._coords[name] = values
length = aesara.tensor.constant(length)
self._dim_lengths[name] = length
self._coords[name] = values

def add_coords(
self,
Expand All @@ -1101,6 +1102,36 @@ def add_coords(
for name, values in coords.items():
self.add_coord(name, values, length=lengths.get(name, None))

def set_dim(self, name: str, new_length: int, coord_values: Optional[Sequence] = None):
"""Update a mutable dimension.

Parameters
----------
name
Name of the dimension.
new_length
New length of the dimension.
coord_values
Optional sequence of coordinate values.
"""
if not isinstance(self.dim_lengths[name], ScalarSharedVariable):
raise ValueError(f"The dimension '{name}' is immutable.")
if coord_values is None and self.coords.get(name, None) is not None:
raise ValueError(
f"'{name}' has coord values. Pass `set_dim(..., coord_values=...)` to update them."
)
if coord_values is not None:
len_cvals = len(coord_values)
if len_cvals != new_length:
raise ShapeError(
f"Length of new coordinate values does not match the new dimension length.",
actual=len_cvals,
expected=new_length,
)
self._coords[name] = tuple(coord_values)
self.dim_lengths[name].set_value(new_length)
return

def set_data(
self,
name: str,
Expand Down Expand Up @@ -1163,16 +1194,40 @@ def set_data(
f"{new_length}, so new coord values for the {dname} dimension are required."
)
if isinstance(length_tensor, TensorConstant):
# The dimension was fixed in length.
# Resizing a data variable in this dimension would
# definitely lead to shape problems.
raise ShapeError(
f"Resizing dimension '{dname}' is impossible, because "
"a 'TensorConstant' stores its length. To be able "
"a `TensorConstant` stores its length. To be able "
"to change the dimension length, pass `mutable=True` when "
"registering the dimension via `model.add_coord`, "
"or define it via a `pm.MutableData` variable."
)
else:
elif length_tensor.owner is not None:
# The dimension was created from a model variable.
# Get a handle on the tensor from which this dimension length was
# obtained by doing subindexing on the shape as in `.shape[i]`.
# Needed to check if it was another shared variable.
# TODO: Consider checking the graph is what we are assuming it is
# isinstance(length_tensor.owner.op, Subtensor)
# isinstance(length_tensor.owner.inputs[0].owner.op, Shape)
length_belongs_to = length_tensor.owner.inputs[0].owner.inputs[0]
if not isinstance(length_belongs_to, SharedVariable):
if length_belongs_to is shared_object:
# No surprise it's changing.
pass
elif isinstance(length_belongs_to, SharedVariable):
# The dimension is mutable through a SharedVariable other than the one being modified.
# But the other variable was not yet re-sized! Warn the user to do that!
warnings.warn(
f"You are resizing a variable with dimension '{dname}' which was initialized "
f"as a mutable dimension by another variable ('{length_belongs_to}')."
" Remember to update that variable with the correct shape to avoid shape issues.",
ShapeWarning,
stacklevel=2,
)
else:
# The dimension is immutable.
raise ShapeError(
f"Resizing dimension '{dname}' with values of length {new_length} would lead to incompatibilities, "
f"because the dimension was initialized from '{length_belongs_to}' which is not a shared variable. "
Expand All @@ -1182,8 +1237,9 @@ def set_data(
expected=old_length,
)
if isinstance(length_tensor, ScalarSharedVariable):
# Updating the shared variable resizes dependent nodes that use this dimension for their `size`.
length_tensor.set_value(new_length)
# The dimension is mutable, but was defined without being linked
# to a shared variable. This is allowed, but a little less robust.
self.set_dim(dname, new_length, coord_values=new_coords)

if new_coords is not None:
# Update the registered coord values (also if they were None)
Expand Down
131 changes: 123 additions & 8 deletions pymc/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import warnings

from functools import reduce

Expand All @@ -37,7 +38,7 @@
from pymc import Deterministic, Potential
from pymc.blocking import DictToArrayBijection, RaveledVars
from pymc.distributions import Normal, transforms
from pymc.exceptions import ShapeError
from pymc.exceptions import ShapeError, ShapeWarning
from pymc.model import Point, ValueGradFunction
from pymc.tests.helpers import SeededTest

Expand Down Expand Up @@ -704,25 +705,45 @@ def test_nested_model_coords():
assert set(m2.RV_dims) < set(m1.RV_dims)


def test_shapeerror_from_resize_immutable_dims():
def test_shapeerror_from_set_data_dimensionality():
with pm.Model() as pmodel:
pm.MutableData("m", np.ones((3,)), dims="one")
with pytest.raises(ValueError, match="must have 1 dimensions"):
pmodel.set_data("m", np.ones((3, 4)))


def test_shapeerror_from_resize_immutable_dim_from_RV():
"""
Trying to resize an immutable dimension should raise a ShapeError.
Even if the variable being updated is a SharedVariable and has other
dimensions that are mutable.
"""
with pm.Model() as pmodel:
a = pm.Normal("a", mu=[1, 2, 3], dims="fixed")
pm.Normal("a", mu=[1, 2, 3], dims="fixed")
assert isinstance(pmodel.dim_lengths["fixed"], TensorVariable)

m = pm.MutableData("m", [[1, 2, 3]], dims=("one", "fixed"))
pm.MutableData("m", [[1, 2, 3]], dims=("one", "fixed"))

# This is fine because the "fixed" dim is not resized
pm.set_data({"m": [[1, 2, 3], [3, 4, 5]]})
pmodel.set_data("m", [[1, 2, 3], [3, 4, 5]])

with pytest.raises(ShapeError, match="was initialized from 'a'"):
# Can't work because the "fixed" dimension is linked to a constant shape:
# Can't work because the "fixed" dimension is linked to a
# TensorVariable with constant shape.
# Note that the new data tries to change both dimensions
with pmodel:
pm.set_data({"m": [[1, 2], [3, 4]]})
pmodel.set_data("m", [[1, 2], [3, 4]])


def test_shapeerror_from_resize_immutable_dim_from_coords():
with pm.Model(coords={"immutable": [1, 2]}) as pmodel:
assert isinstance(pmodel.dim_lengths["immutable"], TensorConstant)
pm.MutableData("m", [1, 2], dims="immutable")
# Data can be changed
pmodel.set_data("m", [3, 4])

with pytest.raises(ShapeError, match="`TensorConstant` stores its length"):
# But the length is linked to a TensorConstant
pmodel.set_data("m", [1, 2, 3], coords=dict(immutable=[1, 2, 3]))


def test_valueerror_from_resize_without_coords_update():
Expand Down Expand Up @@ -765,6 +786,100 @@ def test_add_coord_mutable_kwarg():
assert isinstance(m._dim_lengths["mutable2"], TensorVariable)


def test_set_dim():
"""Test the concious re-sizing of dims created through add_coord()."""
with pm.Model() as pmodel:
pmodel.add_coord("fdim", mutable=False, length=1)
pmodel.add_coord("mdim", mutable=True, length=2)
a = pm.Normal("a", dims="mdim")
assert a.eval().shape == (2,)

with pytest.raises(ValueError, match="is immutable"):
pmodel.set_dim("fdim", 3)

pmodel.set_dim("mdim", 3)
assert a.eval().shape == (3,)


def test_set_dim_with_coords():
"""Test the concious re-sizing of dims created through add_coord() with coord value."""
with pm.Model() as pmodel:
pmodel.add_coord("mdim", mutable=True, length=2, values=["A", "B"])
a = pm.Normal("a", dims="mdim")
assert len(pmodel.coords["mdim"]) == 2

with pytest.raises(ValueError, match="has coord values"):
pmodel.set_dim("mdim", new_length=3)

with pytest.raises(ShapeError, match="does not match"):
pmodel.set_dim("mdim", new_length=3, coord_values=["A", "B"])

pmodel.set_dim("mdim", 3, ["A", "B", "C"])
assert a.eval().shape == (3,)
assert pmodel.coords["mdim"] == ("A", "B", "C")


def test_set_data_indirect_resize():
with pm.Model() as pmodel:
pmodel.add_coord("mdim", mutable=True, length=2)
pm.MutableData("mdata", [1, 2], dims="mdim")

# First resize the dimension.
pmodel.dim_lengths["mdim"].set_value(3)
# Then change the data.
with warnings.catch_warnings():
warnings.simplefilter("error")
pmodel.set_data("mdata", [1, 2, 3])

# Now the other way around.
with warnings.catch_warnings():
warnings.simplefilter("error")
pmodel.set_data("mdata", [1, 2, 3, 4])


def test_set_data_warns_on_resize_of_dims_defined_by_other_mutabledata():
with pm.Model() as pmodel:
pm.MutableData("m1", [1, 2], dims="mutable")
pm.MutableData("m2", [3, 4], dims="mutable")

# Resizing the non-defining variable first gives a warning
with pytest.warns(ShapeWarning, match="by another variable"):
pmodel.set_data("m2", [4, 5, 6])
pmodel.set_data("m1", [1, 2, 3])

# Resizing the definint variable first is silent
with warnings.catch_warnings():
warnings.simplefilter("error")
pmodel.set_data("m1", [1, 2])
pmodel.set_data("m2", [3, 4])


def test_set_data_indirect_resize_with_coords():
with pm.Model() as pmodel:
pmodel.add_coord("mdim", ["A", "B"], mutable=True, length=2)
pm.MutableData("mdata", [1, 2], dims="mdim")

assert pmodel.coords["mdim"] == ("A", "B")

# First resize the dimension.
pmodel.set_dim("mdim", 3, ["A", "B", "C"])
assert pmodel.coords["mdim"] == ("A", "B", "C")
# Then change the data.
with warnings.catch_warnings():
warnings.simplefilter("error")
pmodel.set_data("mdata", [1, 2, 3])

# Now the other way around.
with warnings.catch_warnings():
warnings.simplefilter("error")
pmodel.set_data("mdata", [1, 2, 3, 4], coords=dict(mdim=["A", "B", "C", "D"]))
assert pmodel.coords["mdim"] == ("A", "B", "C", "D")

# This time with incorrectly sized coord values
with pytest.raises(ShapeError, match="new coordinate values"):
pmodel.set_data("mdata", [1, 2], coords=dict(mdim=[1, 2, 3]))


@pytest.mark.parametrize("jacobian", [True, False])
def test_model_logp(jacobian):
with pm.Model() as m:
Expand Down