diff --git a/_quarto.yml b/_quarto.yml index d7445354..3e31a4ac 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -74,6 +74,7 @@ quartodoc: contents: - name: Config - name: CustomCheck + - name: RequiredCheck - name: Exclusion - title: Output diff --git a/src/check_datapackage/__init__.py b/src/check_datapackage/__init__.py index 8239c552..5487f8e4 100644 --- a/src/check_datapackage/__init__.py +++ b/src/check_datapackage/__init__.py @@ -2,7 +2,7 @@ from .check import check from .config import Config -from .custom_check import CustomCheck +from .custom_check import CustomCheck, RequiredCheck from .examples import example_package_properties, example_resource_properties from .exclusion import Exclusion from .issue import Issue @@ -13,6 +13,7 @@ "Exclusion", "Issue", "CustomCheck", + "RequiredCheck", "example_package_properties", "example_resource_properties", "check", diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index e9c47b5f..9a888ac8 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -7,7 +7,7 @@ from check_datapackage.config import Config from check_datapackage.constants import DATA_PACKAGE_SCHEMA_PATH, GROUP_ERRORS -from check_datapackage.custom_check import apply_custom_checks +from check_datapackage.custom_check import apply_extensions from check_datapackage.exclusion import exclude from check_datapackage.internals import ( _filter, @@ -44,7 +44,7 @@ class for more details, especially about the default values. _set_should_fields_to_required(schema) issues = _check_object_against_json_schema(properties, schema) - issues += apply_custom_checks(config.custom_checks, properties) + issues += apply_extensions(properties, config.custom_checks) issues = exclude(issues, config.exclusions, properties) return sorted(set(issues)) diff --git a/src/check_datapackage/config.py b/src/check_datapackage/config.py index 7a93a82f..99b77850 100644 --- a/src/check_datapackage/config.py +++ b/src/check_datapackage/config.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field from typing import Literal -from check_datapackage.custom_check import CustomCheck +from check_datapackage.custom_check import CustomCheck, RequiredCheck from check_datapackage.exclusion import Exclusion @@ -12,8 +12,8 @@ class Config: Attributes: exclusions (list[Exclusion]): Any issues matching any of Exclusion objects will be excluded (i.e., removed from the output of the check function). - custom_checks (list[CustomCheck]): Custom checks listed here will be done in - addition to checks defined in the Data Package standard. + custom_checks (list[CustomCheck | RequiredCheck]): Custom checks listed here + will be done in addition to checks defined in the Data Package standard. strict (bool): Whether to include "SHOULD" checks in addition to "MUST" checks from the Data Package standard. If True, "SHOULD" checks will also be included. Defaults to False. @@ -31,14 +31,18 @@ class Config: message="Data Packages may only be licensed under MIT.", check=lambda license_name: license_name == "mit", ) + required_title_check = cdp.RequiredCheck( + jsonpath="$.title", + message="A title is required.", + ) config = cdp.Config( exclusions=[exclusion_required], - custom_checks=[license_check], + custom_checks=[license_check, required_title_check], ) ``` """ exclusions: list[Exclusion] = field(default_factory=list) - custom_checks: list[CustomCheck] = field(default_factory=list) + custom_checks: list[CustomCheck | RequiredCheck] = field(default_factory=list) strict: bool = False version: Literal["v1", "v2"] = "v2" diff --git a/src/check_datapackage/custom_check.py b/src/check_datapackage/custom_check.py index 8e55a18a..564e6e15 100644 --- a/src/check_datapackage/custom_check.py +++ b/src/check_datapackage/custom_check.py @@ -1,18 +1,21 @@ +import re from dataclasses import dataclass from typing import Any, Callable from check_datapackage.internals import ( + DescriptorField, _filter, _flat_map, + _get_direct_jsonpaths, _get_fields_at_jsonpath, _map, ) from check_datapackage.issue import Issue -@dataclass +@dataclass(frozen=True) class CustomCheck: - """A custom check to be done on a Data Package descriptor. + """A custom check to be done on Data Package metadata. Attributes: jsonpath (str): The location of the field or fields the custom check applies to, @@ -45,49 +48,101 @@ class CustomCheck: check: Callable[[Any], bool] type: str = "custom" + def apply(self, properties: dict[str, Any]) -> list[Issue]: + """Applies the custom check to the properties. -def apply_custom_checks( - custom_checks: list[CustomCheck], descriptor: dict[str, Any] -) -> list[Issue]: - """Checks the descriptor for all custom checks and creates issues if any fail. + Args: + properties: The properties to check. - Args: - custom_checks: The custom checks to apply to the descriptor. - descriptor: The descriptor to check. + Returns: + A list of `Issue`s. + """ + fields: list[DescriptorField] = _get_fields_at_jsonpath( + self.jsonpath, + properties, + ) + matches: list[DescriptorField] = _filter( + fields, + lambda field: not self.check(field.value), + ) + return _map( + matches, + lambda field: Issue( + jsonpath=field.jsonpath, type=self.type, message=self.message + ), + ) - Returns: - A list of `Issue`s. + +@dataclass(frozen=True) +class RequiredCheck: + """Set a specific property as required. + + Attributes: + jsonpath (str): The location of the field or fields, expressed in [JSON + path](https://jg-rp.github.io/python-jsonpath/syntax/) notation, to which + the check applies (e.g., `$.resources[*].name`). + message (str): The message that is shown when the check fails. + + Examples: + ```{python} + import check_datapackage as cdp + required_title_check = cdp.RequiredCheck( + jsonpath="$.title", + message="A title is required.", + ) + ``` """ - return _flat_map( - custom_checks, - lambda custom_check: _apply_custom_check(custom_check, descriptor), - ) + jsonpath: str + message: str + + def apply(self, properties: dict[str, Any]) -> list[Issue]: + """Applies the required check to the properties. + + Args: + properties: The properties to check. + + Returns: + A list of `Issue`s. + """ + # TODO: check jsonpath when checking other user input + field_name_match = re.search(r"(? list[Issue]: - """Applies the custom check to the descriptor. - If any fields fail the custom check, this function creates a list of issues - for those fields. +def apply_extensions( + properties: dict[str, Any], + # TODO: extensions: Extensions once Extensions implemented + extensions: list[CustomCheck | RequiredCheck], +) -> list[Issue]: + """Applies the extension checks to the properties. Args: - custom_check: The custom check to apply to the descriptor. - descriptor: The descriptor to check. + properties: The properties to check. + extensions: The user-defined extensions to apply to the properties. Returns: A list of `Issue`s. """ - matching_fields = _get_fields_at_jsonpath(custom_check.jsonpath, descriptor) - failed_fields = _filter( - matching_fields, lambda field: not custom_check.check(field.value) - ) - return _map( - failed_fields, - lambda field: Issue( - jsonpath=field.jsonpath, - type=custom_check.type, - message=custom_check.message, - ), + return _flat_map( + extensions, + lambda extension: extension.apply(properties), ) diff --git a/src/check_datapackage/internals.py b/src/check_datapackage/internals.py index a827ef94..0a5b086e 100644 --- a/src/check_datapackage/internals.py +++ b/src/check_datapackage/internals.py @@ -26,6 +26,12 @@ def _get_fields_at_jsonpath( return _map(matches, _create_descriptor_field) +def _get_direct_jsonpaths(jsonpath: str, descriptor: dict[str, Any]) -> list[str]: + """Returns all direct JSON paths that match a direct or indirect JSON path.""" + fields = _get_fields_at_jsonpath(jsonpath, descriptor) + return _map(fields, lambda field: field.jsonpath) + + def _create_descriptor_field(match: JSONPathMatch) -> DescriptorField: return DescriptorField( jsonpath=match.path.replace("['", ".").replace("']", ""), diff --git a/tests/test_custom_check.py b/tests/test_custom_check.py index 690de0d4..45476b31 100644 --- a/tests/test_custom_check.py +++ b/tests/test_custom_check.py @@ -1,6 +1,6 @@ from check_datapackage.check import check from check_datapackage.config import Config -from check_datapackage.custom_check import CustomCheck +from check_datapackage.custom_check import CustomCheck, RequiredCheck from check_datapackage.examples import ( example_package_properties, example_resource_properties, @@ -54,12 +54,24 @@ def test_indirect_jsonpath(): def test_multiple_custom_checks(): - properties = example_package_properties() - properties["name"] = "ALLCAPS" - properties["resources"][0]["name"] = "not starting with woolly" + descriptor = example_package_properties() + descriptor["name"] = "ALLCAPS" + descriptor["resources"][0]["name"] = "not starting with woolly" + del descriptor["version"] + + version_check = RequiredCheck( + jsonpath="$.version", + message="Version is required.", + ) - config = Config(custom_checks=[lowercase_check, resource_name_check]) - issues = check(properties, config=config) + config = Config( + custom_checks=[ + lowercase_check, + resource_name_check, + version_check, + ] + ) + issues = check(descriptor, config=config) assert issues == [ Issue( @@ -72,6 +84,11 @@ def test_multiple_custom_checks(): type=resource_name_check.type, message=resource_name_check.message, ), + Issue( + jsonpath=version_check.jsonpath, + type="required", + message=version_check.message, + ), ] @@ -97,3 +114,44 @@ def test_no_matching_jsonpath(): issues = check(properties, config=config) assert issues == [] + + +def test_required_check_wildcard(): + descriptor = example_package_properties() + id_check = RequiredCheck( + jsonpath="$.*.id", + message="All fields must have an id.", + ) + config = Config(custom_checks=[id_check]) + + issues = check(descriptor, config=config) + + assert len(issues) == 8 + + +def test_required_check_array_wildcard(): + descriptor = example_package_properties() + descriptor["contributors"] = [ + {"path": "a/path"}, + {"path": "a/path"}, + {"path": "a/path", "name": "a name"}, + ] + name_check = RequiredCheck( + jsonpath="$.contributors[*].name", + message="Contributor name is required.", + ) + config = Config(custom_checks=[name_check]) + issues = check(descriptor, config=config) + + assert issues == [ + Issue( + jsonpath="$.contributors[0].name", + type="required", + message=name_check.message, + ), + Issue( + jsonpath="$.contributors[1].name", + type="required", + message=name_check.message, + ), + ]