diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index f7ad45835..38b919c45 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -154,7 +154,7 @@ jobs: # TODO: Flip back to schedule condition before merge PYTEST_ARGS+=" --runslow" - python -m pytest -x --durations=20 $PYTEST_ARGS $COV openff/toolkit/_tests + python -m pytest --durations=20 $PYTEST_ARGS $COV openff/toolkit/_tests - name: Run code snippets in docs if: ${{ matrix.rdkit == true && matrix.openeye == true }} diff --git a/openff/toolkit/_tests/test_parameters.py b/openff/toolkit/_tests/test_parameters.py index bd9ae0836..683cc30bd 100644 --- a/openff/toolkit/_tests/test_parameters.py +++ b/openff/toolkit/_tests/test_parameters.py @@ -822,6 +822,53 @@ class MyParameterHandler(ParameterHandler): ): handler.create_force() + def test_hash(self): + bh = BondHandler(skip_version_check=True, allow_cosmetic_attributes=True) + + hash1 = hash(bh) + # Ensure hash changes when a new parameter is added + bh.add_parameter( + { + "smirks": "[*:1]-[*:2]", + "length": 1 * unit.angstrom, + "k": 10 * unit.kilocalorie / unit.mole / unit.angstrom ** 2, + "id": "b0", + } + ) + hash2 = hash(bh) + assert hash1 != hash2 + + # Ensure hash changes when another parameter that differs only by SMIRKS is added + bh.add_parameter( + { + "smirks": "[C:1]-[C:2]", + "length": 1 * unit.angstrom, + "k": 10 * unit.kilocalorie / unit.mole / unit.angstrom ** 2, + "id": "b0", + } + ) + hash3 = hash(bh) + assert hash2 != hash3 + + bh.add_cosmetic_attribute("fizz", "buzz") + hash3p5 = hash(bh) + assert hash3 != hash3p5 + + # Ensure hash changes when a cosmetic attribute is added + bh.parameters[0].add_cosmetic_attribute("foo", "bar") + hash4 = hash(bh) + assert hash3p5 != hash4 + + # Ensure hash changes when parameters are reordered + param = bh.parameters.pop(0) + bh.parameters.append(param) + hash5 = hash(bh) + assert hash4 != hash5 + + # Ensure hash doesn't change when the contents haven't changed + hash6 = hash(bh) + assert hash5 == hash6 + class TestParameterList: """Test capabilities of ParameterList for accessing and manipulating SMIRNOFF parameter definitions.""" diff --git a/openff/toolkit/typing/engines/smirnoff/forcefield.py b/openff/toolkit/typing/engines/smirnoff/forcefield.py index 2bf7b3af7..678b43619 100644 --- a/openff/toolkit/typing/engines/smirnoff/forcefield.py +++ b/openff/toolkit/typing/engines/smirnoff/forcefield.py @@ -841,8 +841,7 @@ def _to_smirnoff_data(self, discard_cosmetic_attributes: bool = False) -> dict: for parameter_handler in self._parameter_handlers.values(): # If _TAGNAME is None, the default value, an error should have been # thrown upon registering it, so assume it is str here - handler_tag: str = parameter_handler._TAGNAME # type: ignore[assignment] - l1_dict[handler_tag] = parameter_handler.to_dict( + l1_dict[parameter_handler._TAGNAME] = parameter_handler.to_dict( discard_cosmetic_attributes=discard_cosmetic_attributes ) @@ -1099,8 +1098,8 @@ def to_string( smirnoff_data = self._to_smirnoff_data( discard_cosmetic_attributes=discard_cosmetic_attributes ) - string_data = io_handler.to_string(smirnoff_data) - return string_data + + return io_handler.to_string(smirnoff_data) def to_file( self, @@ -1432,13 +1431,13 @@ def __getitem__(self, val: Union[str, ParameterHandler]) -> ParameterHandler: Syntax sugar for lookikng up a ParameterHandler. Note that only string-based lookups are currently supported. """ - if isinstance(val, str): + if type(val) is str: if val in self._parameter_handlers: return self.get_parameter_handler(val) else: raise KeyError(f"Parameter handler with name '{val}' not found.") - elif isinstance(val, ParameterHandler) or issubclass(val, ParameterHandler): - raise NotImplementedError + else: + raise NotImplementedError(f"Lookup by {type(val)=} is not supported.") def __hash__(self) -> int: """Deterministically hash a ForceField object diff --git a/openff/toolkit/typing/engines/smirnoff/io.py b/openff/toolkit/typing/engines/smirnoff/io.py index f6912e269..a373ebe46 100644 --- a/openff/toolkit/typing/engines/smirnoff/io.py +++ b/openff/toolkit/typing/engines/smirnoff/io.py @@ -205,18 +205,18 @@ def prepend_all_keys( A set or list of strings, indicating keys not to prepend in the data structure """ - if isinstance(d, dict): + if type(d) is dict: for key in list(d.keys()): if key in ignore_keys: continue - if isinstance(d[key], list) or isinstance(d[key], dict): + if type(d[key]) in (list, dict): prepend_all_keys(d[key], char=char, ignore_keys=ignore_keys) else: new_key = char + key d[new_key] = d[key] del d[key] prepend_all_keys(d[new_key], char=char, ignore_keys=ignore_keys) - elif isinstance(d, list): + elif type(d) is list: for item in d: prepend_all_keys(item, char=char, ignore_keys=ignore_keys) @@ -224,12 +224,15 @@ def prepend_all_keys( # unless they're prepended by "@" prepend_all_keys(smirnoff_data["SMIRNOFF"], ignore_keys=["Author", "Date"]) - # Reorder parameter sections to put Author and Date at the top (this is the only - # way to change the order of items in a dict, as far as I can tell) - for key, value in list(smirnoff_data["SMIRNOFF"].items()): - if key in ["Author", "Date"]: - continue - del smirnoff_data["SMIRNOFF"][key] - smirnoff_data["SMIRNOFF"][key] = value + # Ensure Author and Date are listed first, necessitates creating a new dict + return_dict = { + "SMIRNOFF": { + "Author": smirnoff_data['SMIRNOFF'].pop("Author", "Author Unknown"), + "Date": smirnoff_data['SMIRNOFF'].pop("Date", "Date Unknown"), + }} - return xmltodict.unparse(smirnoff_data, pretty=True, indent=" " * 4) + return_dict['SMIRNOFF'].update(smirnoff_data['SMIRNOFF']) + + del smirnoff_data + + return xmltodict.unparse(return_dict, pretty=True, indent=" " * 4) diff --git a/openff/toolkit/typing/engines/smirnoff/parameters.py b/openff/toolkit/typing/engines/smirnoff/parameters.py index b42fcba8b..88badc936 100644 --- a/openff/toolkit/typing/engines/smirnoff/parameters.py +++ b/openff/toolkit/typing/engines/smirnoff/parameters.py @@ -345,7 +345,7 @@ def __init__( converter: Optional[Callable] = None, docstring: str = "", ): - if isinstance(unit, str): + if type(unit) is str: # be careful with module & variable names unit = Unit(unit) @@ -1086,37 +1086,38 @@ def to_dict( def __getattr__(self, item): """Take care of mapping indexed attributes to their respective list elements.""" - # Try matching the case where there are two indices - # this indicates a index_mapped parameter - attr_name, index, key = self._split_attribute_index_mapping(item) + if not item.isalpha(): + # Try matching the case where there are two indices + # this indicates a index_mapped parameter + attr_name, index, key = self._split_attribute_index_mapping(item) - # Check if this is an indexed_mapped attribute. - if ( - key is not None - and index is not None - and attr_name in self._get_indexed_mapped_parameter_attributes() - ): - indexed_mapped_attr_value = getattr(self, attr_name) - try: - return indexed_mapped_attr_value[index][key] - except (IndexError, KeyError) as err: - raise MissingIndexedAttributeError( - f"{err!s} '{item}' is out of bounds for indexed attribute '{attr_name}'" - ) + # Check if this is an indexed_mapped attribute. + if ( + key is not None + and index is not None + and attr_name in self._get_indexed_mapped_parameter_attributes() + ): + indexed_mapped_attr_value = getattr(self, attr_name) + try: + return indexed_mapped_attr_value[index][key] + except (IndexError, KeyError) as err: + raise MissingIndexedAttributeError( + f"{err!s} '{item}' is out of bounds for indexed attribute '{attr_name}'" + ) - # Otherwise, try indexed attribute - # Separate the indexed attribute name from the list index. - attr_name, index = self._split_attribute_index(item) + # Otherwise, try indexed attribute + # Separate the indexed attribute name from the list index. + attr_name, index = self._split_attribute_index(item) - # Check if this is an indexed attribute. - if index is not None and attr_name in self._get_indexed_parameter_attributes(): - indexed_attr_value = getattr(self, attr_name) - try: - return indexed_attr_value[index] - except IndexError: - raise MissingIndexedAttributeError( - f"'{item}' is out of bounds for indexed attribute '{attr_name}'" - ) + # Check if this is an indexed attribute. + if index is not None and attr_name in self._get_indexed_parameter_attributes(): + indexed_attr_value = getattr(self, attr_name) + try: + return indexed_attr_value[index] + except IndexError: + raise MissingIndexedAttributeError( + f"'{item}' is out of bounds for indexed attribute '{attr_name}'" + ) # Otherwise, forward the search to the next class in the MRO. try: @@ -1355,33 +1356,33 @@ def filter(x): # sorts the attribute alphabetically by name. Here we want the order # to be the same as the declaration order, which is guaranteed by PEP 520, # starting from the parent class. - parameter_attributes = dict( - (name, descriptor) + return { + name: descriptor for c in reversed(inspect.getmro(cls)) for name, descriptor in c.__dict__.items() if isinstance(descriptor, ParameterAttribute) and filter(descriptor) - ) - return parameter_attributes + } + @classmethod def _get_indexed_mapped_parameter_attributes(cls): """Shortcut to retrieve only IndexedMappedParameterAttributes.""" return cls._get_parameter_attributes( - filter=lambda x: isinstance(x, IndexedMappedParameterAttribute) + filter=lambda x: type(x) is IndexedMappedParameterAttribute ) @classmethod def _get_indexed_parameter_attributes(cls): """Shortcut to retrieve only IndexedParameterAttributes.""" return cls._get_parameter_attributes( - filter=lambda x: isinstance(x, IndexedParameterAttribute) + filter=lambda x: type(x) is IndexedParameterAttribute ) @classmethod def _get_mapped_parameter_attributes(cls): """Shortcut to retrieve only IndexedParameterAttributes.""" return cls._get_parameter_attributes( - filter=lambda x: isinstance(x, MappedParameterAttribute) + filter=lambda x: type(x) is MappedParameterAttribute ) @classmethod @@ -1588,14 +1589,14 @@ def __contains__(self, item): item SMIRKS of item in this ParameterList """ - if isinstance(item, str): + if type(item) is str: # Special case for SMIRKS strings if item in [result.smirks for result in self]: return True # Fall back to traditional access return list.__contains__(self, item) - def to_list(self, discard_cosmetic_attributes=True): + def to_list(self, discard_cosmetic_attributes: bool =True) -> list[dict]: """ Render this ParameterList to a normal list, serializing each ParameterType object in it to dict. @@ -1610,15 +1611,12 @@ def to_list(self, discard_cosmetic_attributes=True): parameter_list :list[dict] A serialized representation of a ParameterList, with each ParameterType it contains converted to dict. """ - parameter_list = list() - - for parameter in self: - parameter_dict = parameter.to_dict( + return [ + parameter.to_dict( discard_cosmetic_attributes=discard_cosmetic_attributes ) - parameter_list.append(parameter_dict) - - return parameter_list + for parameter in self + ] class VirtualSiteParameterList(ParameterList): @@ -1905,6 +1903,31 @@ def __init__( # Initialize ParameterAttributes and cosmetic attributes. super().__init__(allow_cosmetic_attributes=allow_cosmetic_attributes, **kwargs) + def __hash__(self) -> int: + """ + Hash a ParameterHandler and all of its contents (INCLUDING cosmetic attributes). + + This method does not attempt to return the same hash for ParameterHandlers with equivalent + physics/chemistry but different cosmetic attributes or units. Instead this is a hash of all + of the ParameterHandler's contents, even if they don't affect system creation in any way. + """ + import xmltodict + + from openff.toolkit.utils.utils import convert_all_quantities_to_string + + return hash( + xmltodict.unparse( + { + 'ROOT': convert_all_quantities_to_string( + self.to_dict( + discard_cosmetic_attributes=False, + ) + ) + } + ) + ) + + def _add_parameters(self, section_dict, allow_cosmetic_attributes=False): """ Extend the ParameterList in this ParameterHandler using a SMIRNOFF data source. @@ -1925,7 +1948,7 @@ def _add_parameters(self, section_dict, allow_cosmetic_attributes=False): if key != element_name: break # If there are multiple parameters, this will be a list. If there's just one, make it a list - if not (isinstance(val, list)): + if type(val) is not list: val = [val] # If we're reading the parameter list, iterate through and attach units to @@ -3854,13 +3877,12 @@ def _find_matches( assigned_matches_by_parent = self._find_matches_by_parent(entity) return_dict = {} for parent_index, assigned_parameters in assigned_matches_by_parent.items(): - assigned_matches = [] - for assigned_parameter, match_orientations in assigned_parameters: - for match in match_orientations: - assigned_matches.append( - ParameterHandler._Match(assigned_parameter, match) - ) - return_dict[(parent_index,)] = assigned_matches + + return_dict[(parent_index,)] = [ + ParameterHandler._Match(assigned_parameter, match) + for assigned_parameter, match_orientations in assigned_parameters + for match in match_orientations + ] return return_dict diff --git a/openff/toolkit/utils/serialization.py b/openff/toolkit/utils/serialization.py index 31ff5d289..3c9e36bc2 100644 --- a/openff/toolkit/utils/serialization.py +++ b/openff/toolkit/utils/serialization.py @@ -430,13 +430,13 @@ def _contains_bytes(val) -> bool: """Report if any values in list are bytes.""" if val is None: return False - elif isinstance(val, bytes): + elif type(val) is bytes: return True - elif isinstance(val, (int, float, str, bool)): + elif type(val) in (int, float, str, bool): return False - elif isinstance(val, (list, tuple)): + elif type(val) in (list, tuple): return any([_contains_bytes(x) for x in val]) - elif isinstance(val, dict): + elif type(val) is dict: return any([_contains_bytes(x) for x in val.values()]) else: raise ValueError(f"type {val}") @@ -449,20 +449,26 @@ def _prep_numpy_data_for_json(data: dict) -> dict: big_endian_float = np.dtype("float").newbyteorder(">") for key, val in data.items(): - if isinstance(val, np.ndarray): + if type(val) is np.ndarray: data[key] = val.tolist() - if isinstance(val, dict): + if type(val) is dict: data[key] = _prep_numpy_data_for_json(val) - if isinstance(val, bytes): + if type(val) is bytes: data[key] = np.frombuffer(val, dtype=big_endian_float).tolist() - if isinstance(val, (list, tuple)): + if type(val) in (list, tuple): for i, element in enumerate(val): - if isinstance(element, bytes): + if type(element) is bytes: # Handles case of list[np.array], like Molecule.conformers data[key][i] = np.frombuffer( element, dtype=big_endian_float ).tolist() - elif isinstance(element, dict): + elif type(element) is dict: # Handles case of list[Molecule], like Topology.molecules data[key][i] = _prep_numpy_data_for_json(element) + elif type(val) in (int, float, str, bool): + # these types should be fine? + pass + else: + raise ValueError(f"Value {val=} of type {type(val)=} is not supported for serialization") + return data diff --git a/openff/toolkit/utils/utils.py b/openff/toolkit/utils/utils.py index 9821dfad7..a8ba8ad29 100644 --- a/openff/toolkit/utils/utils.py +++ b/openff/toolkit/utils/utils.py @@ -127,13 +127,15 @@ def format_unit_simple(unit, registry, **options): return " * ".join(f"{u} ** {p}" for u, p in unit.items()) +@functools.lru_cache def unit_to_string(input_unit: Unit) -> str: return f"{input_unit:simple}" +@functools.lru_cache def quantity_to_dict(input_quantity): value = input_quantity.magnitude - if isinstance(value, np.ndarray): + if type(value) is np.ndarray: value = value.tolist() return { @@ -163,10 +165,10 @@ def quantity_to_string(input_quantity: Quantity) -> str: The serialized quantity """ - unitless_value: float | int | NDArray | list = input_quantity.m_as(input_quantity.units) + unitless_value: float | int | NDArray | list = input_quantity.m # The string representation of a numpy array doesn't have commas and breaks the # parser, thus we convert any arrays to list here - if isinstance(unitless_value, np.ndarray): + if type(unitless_value) is np.ndarray: unitless_value = list(unitless_value) unit_string = unit_to_string(input_quantity.units) @@ -218,7 +220,7 @@ def string_to_quantity(quantity_string: str) -> Union[int, float, Quantity]: # TODO: Should intentionally unitless array-likes be Quantity objects # or their raw representation? - if quantity.units == _DIMENSIONLESS and isinstance(quantity.m, (int, float)): + if quantity.units == _DIMENSIONLESS and type(quantity.m) in (int, float): return quantity.m else: return quantity @@ -255,7 +257,7 @@ def convert_all_strings_to_quantity( """ from pint import DefinitionSyntaxError - if isinstance(smirnoff_data, dict): + if type(smirnoff_data) is dict: for key, value in smirnoff_data.items(): if key in ignore_keys: smirnoff_data[key] = value @@ -266,7 +268,7 @@ def convert_all_strings_to_quantity( ) obj_to_return = smirnoff_data - elif isinstance(smirnoff_data, list): + elif type(smirnoff_data) is list: for index, item in enumerate(smirnoff_data): smirnoff_data[index] = convert_all_strings_to_quantity( item, @@ -274,7 +276,7 @@ def convert_all_strings_to_quantity( ) obj_to_return = smirnoff_data - elif isinstance(smirnoff_data, int) or isinstance(smirnoff_data, float): + elif type(smirnoff_data) in (int, float): obj_to_return = smirnoff_data else: @@ -323,15 +325,15 @@ def convert_all_quantities_to_string( with ``openff.units.Quantity``s converted to string """ - if isinstance(smirnoff_data, dict): + if type(smirnoff_data) is dict: for key, value in smirnoff_data.items(): smirnoff_data[key] = convert_all_quantities_to_string(value) return smirnoff_data - elif isinstance(smirnoff_data, list): + elif type(smirnoff_data) is list: for index, item in enumerate(smirnoff_data): smirnoff_data[index] = convert_all_quantities_to_string(item) return smirnoff_data - elif isinstance(smirnoff_data, Quantity): + elif type(smirnoff_data) is Quantity: return quantity_to_string(smirnoff_data) else: return smirnoff_data @@ -435,9 +437,9 @@ def deserialize_numpy( np_array The deserialized numpy array """ - if isinstance(serialized_np, list): + if type(serialized_np) is list: np_array = np.array(serialized_np) - if isinstance(serialized_np, bytes): + if type(serialized_np) is bytes: dt = np.dtype("float").newbyteorder(">") np_array = np.frombuffer(serialized_np, dtype=dt) np_array = np_array.reshape(shape) @@ -677,7 +679,7 @@ def recursive_attach_unit_strings(smirnoff_data, units_to_attach): # If we're working with a dict, see if there are any new unit entries and store them, # then operate recursively on the values in the dict. - if isinstance(smirnoff_data, dict): + if type(smirnoff_data) is dict: # Go over all key:value pairs once to see if there are new units to attach. # Note that units to be attached can be defined in the same dict as the # key:value pair they will be attached to, so we need to complete this check @@ -704,7 +706,7 @@ def recursive_attach_unit_strings(smirnoff_data, units_to_attach): ) # If it's a list, operate on each member of the list - elif isinstance(smirnoff_data, list): + elif type(smirnoff_data) is list: for index, value in enumerate(smirnoff_data): smirnoff_data[index] = recursive_attach_unit_strings(value, units_to_attach) @@ -805,10 +807,10 @@ def sort_smirnoff_dict(data): """ sorted_dict = dict() for key, val in sorted(data.items()): - if isinstance(val, dict): + if type(val) is dict: # This should hit each ParameterHandler and dicts within them sorted_dict[key] = sort_smirnoff_dict(val) - elif isinstance(val, list): + elif type(val) is list: # Handle case of ParameterLists, which show up in # the smirnoff dicts as lists of dicts new_parameter_list = list()