From 20ced703ce27ac085812680ab8ee889cf7a2c7ce Mon Sep 17 00:00:00 2001 From: martonvago Date: Mon, 13 Oct 2025 13:57:41 +0100 Subject: [PATCH 01/13] feat: :sparkles: add RequiredRule --- _quarto.yml | 1 + src/check_datapackage/__init__.py | 3 +- src/check_datapackage/rule.py | 110 ++++++++++++++++++++++++------ tests/test_rule.py | 59 +++++++++++++++- 4 files changed, 149 insertions(+), 24 deletions(-) diff --git a/_quarto.yml b/_quarto.yml index 7ffc3f86..22322130 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -70,6 +70,7 @@ quartodoc: - name: Config - name: Exclude - name: Rule + - name: RequiredRule - name: Issue - name: example_package_descriptor - name: example_resource_descriptor diff --git a/src/check_datapackage/__init__.py b/src/check_datapackage/__init__.py index 2b8b95ef..73a7e306 100644 --- a/src/check_datapackage/__init__.py +++ b/src/check_datapackage/__init__.py @@ -6,13 +6,14 @@ from .exclude import Exclude from .issue import Issue from .read_json import read_json -from .rule import Rule +from .rule import RequiredRule, Rule __all__ = [ "Config", "Exclude", "Issue", "Rule", + "RequiredRule", "example_package_descriptor", "example_resource_descriptor", "check", diff --git a/src/check_datapackage/rule.py b/src/check_datapackage/rule.py index 76898bd9..c3a974e7 100644 --- a/src/check_datapackage/rule.py +++ b/src/check_datapackage/rule.py @@ -1,7 +1,9 @@ +import re from dataclasses import dataclass from typing import Any, Callable from check_datapackage.internals import ( + DescriptorField, _filter, _flat_map, _get_fields_at_jsonpath, @@ -10,7 +12,7 @@ from check_datapackage.issue import Issue -@dataclass +@dataclass(frozen=True) class Rule: """A custom check to be done on a Data Package descriptor. @@ -43,34 +45,84 @@ class Rule: check: Callable[[Any], bool] type: str = "custom" + def apply(self, descriptor: dict[str, Any]) -> list[Issue]: + """Checks the descriptor against this rule and creates issues on failure. -def apply_rules(rules: list[Rule], descriptor: dict[str, Any]) -> list[Issue]: - """Checks the descriptor for all rules and creates issues for fields that fail. + Args: + descriptor: The descriptor to check. - Args: - rules: The rules to apply to the descriptor. - descriptor: The descriptor to check. + Returns: + A list of `Issue`s. + """ + matching_fields = _get_fields_at_jsonpath(self.jsonpath, descriptor) + return _get_issues(self, matching_fields) - Returns: - A list of `Issue`s. + +class RequiredRule(Rule): + """A rule that checks that a field is present (i.e. not None). + + Attributes: + jsonpath (str): The location of the field or fields, expressed in [JSON + path](https://jg-rp.github.io/python-jsonpath/syntax/) notation, to which + the rule applies (e.g., `$.resources[*].name`). + message (str): The message that is shown when the rule is violated. + + Examples: + ```{python} + import check_datapackage as cdp + required_title_rule = cdp.RequiredRule( + jsonpath="$.title", + message="A title is required.", + ) + ``` """ - return _flat_map( - rules, - lambda rule: _apply_rule(rule, descriptor), - ) + _field_name: str -def _apply_rule(rule: Rule, descriptor: dict[str, Any]) -> list[Issue]: - """Checks the descriptor against the rule and creates issues for fields that fail. + def __init__(self, jsonpath: str, message: str): + """Initializes the `RequiredRule`.""" + field_name_match = re.search(r"\.(\w+)$", jsonpath) + if not field_name_match: + raise ValueError( + "A `RequiredRule` must point to an object field that is not an array" + " item, e.g., `$.title` or `$.resources[*].name`." + ) - Args: - rule: The rule to apply to the descriptor. - descriptor: The descriptor to check. + self._field_name = field_name_match.group(1) + super().__init__( + jsonpath=jsonpath, + message=message, + check=lambda value: value is not None, + type="required", + ) - Returns: - A list of `Issue`s. - """ - matching_fields = _get_fields_at_jsonpath(rule.jsonpath, descriptor) + def apply(self, descriptor: dict[str, Any]) -> list[Issue]: + """Checks the descriptor against this rule and creates issues on failure. + + Args: + descriptor: The descriptor to check. + + Returns: + A list of `Issue`s. + """ + matching_fields = _get_fields_at_jsonpath(self.jsonpath, descriptor) + matching_paths = _map(matching_fields, lambda field: field.jsonpath) + parent_path = self.jsonpath.rstrip(f".{self._field_name}") + matching_parents = _get_fields_at_jsonpath(parent_path, descriptor) + parent_paths = _map( + matching_parents, lambda parent: f"{parent.jsonpath}.{self._field_name}" + ) + missing_paths = _filter(parent_paths, lambda path: path not in matching_paths) + missing_fields = _map( + missing_paths, + lambda path: DescriptorField(jsonpath=path, value=None), + ) + + return _get_issues(self, matching_fields + missing_fields) + + +def _get_issues(rule: Rule, matching_fields: list[DescriptorField]) -> list[Issue]: + """Checks matching fields against the rule and creates issues on failure.""" failed_fields = _filter(matching_fields, lambda field: not rule.check(field.value)) return _map( failed_fields, @@ -78,3 +130,19 @@ def _apply_rule(rule: Rule, descriptor: dict[str, Any]) -> list[Issue]: jsonpath=field.jsonpath, type=rule.type, message=rule.message ), ) + + +def apply_rules(rules: list[Rule], descriptor: dict[str, Any]) -> list[Issue]: + """Checks the descriptor for all rules and creates issues for fields that fail. + + Args: + rules: The rules to apply to the descriptor. + descriptor: The descriptor to check. + + Returns: + A list of `Issue`s. + """ + return _flat_map( + rules, + lambda rule: rule.apply(descriptor), + ) diff --git a/tests/test_rule.py b/tests/test_rule.py index f8330fd0..af80b7cd 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -1,3 +1,5 @@ +from pytest import raises + from check_datapackage.check import check from check_datapackage.config import Config from check_datapackage.examples import ( @@ -5,7 +7,7 @@ example_resource_descriptor, ) from check_datapackage.issue import Issue -from check_datapackage.rule import Rule +from check_datapackage.rule import RequiredRule, Rule lowercase_rule = Rule( jsonpath="$.name", @@ -57,8 +59,20 @@ def test_multiple_rules(): descriptor = example_package_descriptor() descriptor["name"] = "ALLCAPS" descriptor["resources"][0]["name"] = "not starting with woolly" + del descriptor["version"] + + version_rule = RequiredRule( + jsonpath="$.version", + message="Version is required.", + ) - config = Config(rules=[lowercase_rule, resource_name_rule]) + config = Config( + rules=[ + lowercase_rule, + resource_name_rule, + version_rule, + ] + ) issues = check(descriptor, config=config) assert issues == [ @@ -72,6 +86,11 @@ def test_multiple_rules(): type=resource_name_rule.type, message=resource_name_rule.message, ), + Issue( + jsonpath=version_rule.jsonpath, + type="required", + message=version_rule.message, + ), ] @@ -97,3 +116,39 @@ def test_no_matching_jsonpath(): issues = check(descriptor, config=config) assert issues == [] + + +def test_required_rule_indirect_jsonpath(): + descriptor = example_package_descriptor() + descriptor["contributors"] = [ + {"path": "a/path"}, + {"path": "a/path"}, + {"path": "a/path", "name": "a name"}, + ] + rule = RequiredRule( + jsonpath="$.contributors[*].name", + message="Contributor name is required.", + ) + config = Config(rules=[rule]) + issues = check(descriptor, config=config) + + assert issues == [ + Issue( + jsonpath="$.contributors[0].name", + type=rule.type, + message=rule.message, + ), + Issue( + jsonpath="$.contributors[1].name", + type=rule.type, + message=rule.message, + ), + ] + + +def test_required_rule_cannot_apply_to_array_item(): + with raises(ValueError): + RequiredRule( + jsonpath="$.resources[*]", + message="This should fail.", + ) From 520f04a8eeb5445b627670246f57d3a62171013e Mon Sep 17 00:00:00 2001 From: martonvago Date: Tue, 14 Oct 2025 10:15:47 +0100 Subject: [PATCH 02/13] refactor: :recycle: simplify apply logic --- src/check_datapackage/internals.py | 6 ++++ src/check_datapackage/rule.py | 47 +++++++++++++++--------------- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/src/check_datapackage/internals.py b/src/check_datapackage/internals.py index 1bfba88c..cbd7acdf 100644 --- a/src/check_datapackage/internals.py +++ b/src/check_datapackage/internals.py @@ -68,6 +68,12 @@ def _get_fields_at_jsonpath( return _map(matches, _create_descriptor_field) +def _get_direct_jsonpaths(jsonpath: str, descriptor: dict[str, Any]) -> list[str]: + """Returns all direct JSON paths that match a direct or indirect JSON path.""" + fields = _get_fields_at_jsonpath(jsonpath, descriptor) + return _map(fields, lambda field: field.jsonpath) + + def _create_descriptor_field(match: JSONPathMatch) -> DescriptorField: return DescriptorField( jsonpath=match.path.replace("['", ".").replace("']", ""), diff --git a/src/check_datapackage/rule.py b/src/check_datapackage/rule.py index c3a974e7..0e749f64 100644 --- a/src/check_datapackage/rule.py +++ b/src/check_datapackage/rule.py @@ -6,6 +6,7 @@ DescriptorField, _filter, _flat_map, + _get_direct_jsonpaths, _get_fields_at_jsonpath, _map, ) @@ -55,7 +56,15 @@ def apply(self, descriptor: dict[str, Any]) -> list[Issue]: A list of `Issue`s. """ matching_fields = _get_fields_at_jsonpath(self.jsonpath, descriptor) - return _get_issues(self, matching_fields) + failed_fields = _filter( + matching_fields, lambda field: not self.check(field.value) + ) + return _map( + failed_fields, + lambda field: Issue( + jsonpath=field.jsonpath, type=self.type, message=self.message + ), + ) class RequiredRule(Rule): @@ -81,7 +90,7 @@ class RequiredRule(Rule): def __init__(self, jsonpath: str, message: str): """Initializes the `RequiredRule`.""" - field_name_match = re.search(r"\.(\w+)$", jsonpath) + field_name_match = re.search(r"(\.\w+)$", jsonpath) if not field_name_match: raise ValueError( "A `RequiredRule` must point to an object field that is not an array" @@ -105,32 +114,22 @@ def apply(self, descriptor: dict[str, Any]) -> list[Issue]: Returns: A list of `Issue`s. """ - matching_fields = _get_fields_at_jsonpath(self.jsonpath, descriptor) - matching_paths = _map(matching_fields, lambda field: field.jsonpath) - parent_path = self.jsonpath.rstrip(f".{self._field_name}") - matching_parents = _get_fields_at_jsonpath(parent_path, descriptor) - parent_paths = _map( - matching_parents, lambda parent: f"{parent.jsonpath}.{self._field_name}" + matching_paths = _get_direct_jsonpaths(self.jsonpath, descriptor) + indirect_parent_path = self.jsonpath.rstrip(self._field_name) + direct_parent_paths = _get_direct_jsonpaths(indirect_parent_path, descriptor) + missing_paths = _filter( + direct_parent_paths, + lambda path: f"{path}{self._field_name}" not in matching_paths, ) - missing_paths = _filter(parent_paths, lambda path: path not in matching_paths) - missing_fields = _map( + return _map( missing_paths, - lambda path: DescriptorField(jsonpath=path, value=None), + lambda path: Issue( + jsonpath=path + self._field_name, + type=self.type, + message=self.message, + ), ) - return _get_issues(self, matching_fields + missing_fields) - - -def _get_issues(rule: Rule, matching_fields: list[DescriptorField]) -> list[Issue]: - """Checks matching fields against the rule and creates issues on failure.""" - failed_fields = _filter(matching_fields, lambda field: not rule.check(field.value)) - return _map( - failed_fields, - lambda field: Issue( - jsonpath=field.jsonpath, type=rule.type, message=rule.message - ), - ) - def apply_rules(rules: list[Rule], descriptor: dict[str, Any]) -> list[Issue]: """Checks the descriptor for all rules and creates issues for fields that fail. From 3b8a7c3be8ee3461f54c63db08398e57a4c57276 Mon Sep 17 00:00:00 2001 From: martonvago Date: Tue, 14 Oct 2025 10:26:34 +0100 Subject: [PATCH 03/13] fix: :bug: remove unused import --- src/check_datapackage/rule.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/check_datapackage/rule.py b/src/check_datapackage/rule.py index 0e749f64..af93e6e6 100644 --- a/src/check_datapackage/rule.py +++ b/src/check_datapackage/rule.py @@ -3,7 +3,6 @@ from typing import Any, Callable from check_datapackage.internals import ( - DescriptorField, _filter, _flat_map, _get_direct_jsonpaths, From e0f3ac9b656f980d3cc2c8a4f351387a1284edfc Mon Sep 17 00:00:00 2001 From: martonvago Date: Tue, 14 Oct 2025 12:18:23 +0100 Subject: [PATCH 04/13] fix: :bug: disallow all ambiguous JSON paths --- src/check_datapackage/rule.py | 11 +++++++---- tests/test_rule.py | 33 +++++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/check_datapackage/rule.py b/src/check_datapackage/rule.py index af93e6e6..ae558263 100644 --- a/src/check_datapackage/rule.py +++ b/src/check_datapackage/rule.py @@ -89,11 +89,14 @@ class RequiredRule(Rule): def __init__(self, jsonpath: str, message: str): """Initializes the `RequiredRule`.""" - field_name_match = re.search(r"(\.\w+)$", jsonpath) + field_name_match = re.search(r"(? list[Issue]: A list of `Issue`s. """ matching_paths = _get_direct_jsonpaths(self.jsonpath, descriptor) - indirect_parent_path = self.jsonpath.rstrip(self._field_name) + indirect_parent_path = self.jsonpath.removesuffix(self._field_name) direct_parent_paths = _get_direct_jsonpaths(indirect_parent_path, descriptor) missing_paths = _filter( direct_parent_paths, diff --git a/tests/test_rule.py b/tests/test_rule.py index af80b7cd..e53eb41d 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -1,4 +1,4 @@ -from pytest import raises +from pytest import mark, raises from check_datapackage.check import check from check_datapackage.config import Config @@ -118,7 +118,20 @@ def test_no_matching_jsonpath(): assert issues == [] -def test_required_rule_indirect_jsonpath(): +def test_required_rule_wildcard(): + descriptor = example_package_descriptor() + rule = RequiredRule( + jsonpath="$.*.id", + message="All fields must have an id.", + ) + config = Config(rules=[rule]) + + issues = check(descriptor, config=config) + + assert len(issues) == 8 + + +def test_required_rule_array_wildcard(): descriptor = example_package_descriptor() descriptor["contributors"] = [ {"path": "a/path"}, @@ -146,9 +159,21 @@ def test_required_rule_indirect_jsonpath(): ] -def test_required_rule_cannot_apply_to_array_item(): +@mark.parametrize( + "jsonpath", + [ + "$", + "..*", + "created", + "$..path", + "..resources", + "$.resources[0].*", + "$.resources[*]", + ], +) +def test_required_rule_cannot_apply_to_ambiguous_path(jsonpath): with raises(ValueError): RequiredRule( - jsonpath="$.resources[*]", + jsonpath=jsonpath, message="This should fail.", ) From 3cb33cf5ceedf55e7d294fa8965c35d9f4be9e43 Mon Sep 17 00:00:00 2001 From: Marton Vago Date: Tue, 28 Oct 2025 08:47:51 +0000 Subject: [PATCH 05/13] refactor: :recycle: make RequiredCheck not inherit from CustomCheck --- src/check_datapackage/config.py | 4 ++-- src/check_datapackage/custom_check.py | 33 ++++++++++++++++++--------- tests/test_custom_check.py | 4 ++-- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/check_datapackage/config.py b/src/check_datapackage/config.py index 7a93a82f..16a4e8d8 100644 --- a/src/check_datapackage/config.py +++ b/src/check_datapackage/config.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field from typing import Literal -from check_datapackage.custom_check import CustomCheck +from check_datapackage.custom_check import SupportsApply from check_datapackage.exclusion import Exclusion @@ -39,6 +39,6 @@ class Config: """ exclusions: list[Exclusion] = field(default_factory=list) - custom_checks: list[CustomCheck] = field(default_factory=list) + custom_checks: list[SupportsApply] = field(default_factory=list) strict: bool = False version: Literal["v1", "v2"] = "v2" diff --git a/src/check_datapackage/custom_check.py b/src/check_datapackage/custom_check.py index 10932237..b05dd1f2 100644 --- a/src/check_datapackage/custom_check.py +++ b/src/check_datapackage/custom_check.py @@ -1,6 +1,6 @@ import re -from dataclasses import dataclass -from typing import Any, Callable +from dataclasses import dataclass, field +from typing import Any, Callable, Protocol, Sequence from check_datapackage.internals import ( _filter, @@ -12,6 +12,20 @@ from check_datapackage.issue import Issue +class SupportsApply(Protocol): + """A protocol for classes implementing an `apply()` method.""" + def apply(self, descriptor: dict[str, Any]) -> list[Issue]: + """Applies the check to the descriptor and creates issues on failure. + + Args: + descriptor: The descriptor to check. + + Returns: + A list of `Issue`s. + """ + ... + + @dataclass(frozen=True) class CustomCheck: """A custom check to be done on a Data Package descriptor. @@ -68,7 +82,7 @@ def apply(self, descriptor: dict[str, Any]) -> list[Issue]: ) -class RequiredCheck(CustomCheck): +class RequiredCheck: """A custom check that checks that a field is present (i.e. not None). Attributes: @@ -102,12 +116,9 @@ def __init__(self, jsonpath: str, message: str): ) self._field_name = field_name_match.group(1) - super().__init__( - jsonpath=jsonpath, - message=message, - check=lambda value: value is not None, - type="required", - ) + self.jsonpath = jsonpath + self.message = message + def apply(self, descriptor: dict[str, Any]) -> list[Issue]: """Checks the descriptor against this check and creates issues on failure. @@ -129,14 +140,14 @@ def apply(self, descriptor: dict[str, Any]) -> list[Issue]: missing_paths, lambda path: Issue( jsonpath=path + self._field_name, - type=self.type, + type="required", message=self.message, ), ) def apply_custom_checks( - custom_checks: list[CustomCheck], descriptor: dict[str, Any] + custom_checks: Sequence[SupportsApply], descriptor: dict[str, Any] ) -> list[Issue]: """Checks the descriptor for all custom checks and creates issues if any fail. diff --git a/tests/test_custom_check.py b/tests/test_custom_check.py index 839f570c..4321dc01 100644 --- a/tests/test_custom_check.py +++ b/tests/test_custom_check.py @@ -148,12 +148,12 @@ def test_required_check_array_wildcard(): assert issues == [ Issue( jsonpath="$.contributors[0].name", - type=name_check.type, + type="required", message=name_check.message, ), Issue( jsonpath="$.contributors[1].name", - type=name_check.type, + type="required", message=name_check.message, ), ] From 160d764b50caa0d632f6ecd23d8be833a2070655 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Oct 2025 08:49:23 +0000 Subject: [PATCH 06/13] chore(pre-commit): :pencil2: automatic fixes --- src/check_datapackage/custom_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/check_datapackage/custom_check.py b/src/check_datapackage/custom_check.py index b05dd1f2..ea605a9e 100644 --- a/src/check_datapackage/custom_check.py +++ b/src/check_datapackage/custom_check.py @@ -118,7 +118,7 @@ def __init__(self, jsonpath: str, message: str): self._field_name = field_name_match.group(1) self.jsonpath = jsonpath self.message = message - + def apply(self, descriptor: dict[str, Any]) -> list[Issue]: """Checks the descriptor against this check and creates issues on failure. From 7270e9b6b11c3af5ce52902d7a5aae0554db242a Mon Sep 17 00:00:00 2001 From: Marton Vago Date: Tue, 28 Oct 2025 09:02:55 +0000 Subject: [PATCH 07/13] refactor: :recycle: check JSON path in post_init --- src/check_datapackage/check.py | 4 ++-- src/check_datapackage/custom_check.py | 30 +++++++++++++-------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index e9c47b5f..d01a7f55 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -7,7 +7,7 @@ from check_datapackage.config import Config from check_datapackage.constants import DATA_PACKAGE_SCHEMA_PATH, GROUP_ERRORS -from check_datapackage.custom_check import apply_custom_checks +from check_datapackage.custom_check import apply_checks from check_datapackage.exclusion import exclude from check_datapackage.internals import ( _filter, @@ -44,7 +44,7 @@ class for more details, especially about the default values. _set_should_fields_to_required(schema) issues = _check_object_against_json_schema(properties, schema) - issues += apply_custom_checks(config.custom_checks, properties) + issues += apply_checks(config.custom_checks, properties) issues = exclude(issues, config.exclusions, properties) return sorted(set(issues)) diff --git a/src/check_datapackage/custom_check.py b/src/check_datapackage/custom_check.py index b05dd1f2..6040c4fc 100644 --- a/src/check_datapackage/custom_check.py +++ b/src/check_datapackage/custom_check.py @@ -82,8 +82,9 @@ def apply(self, descriptor: dict[str, Any]) -> list[Issue]: ) +@dataclass(frozen=True) class RequiredCheck: - """A custom check that checks that a field is present (i.e. not None). + """A check that checks if a field is present (i.e. not None). Attributes: jsonpath (str): The location of the field or fields, expressed in [JSON @@ -100,24 +101,23 @@ class RequiredCheck: ) ``` """ + jsonpath: str + message: str + _field_name: str = field(init=False, repr=False) - _field_name: str - def __init__(self, jsonpath: str, message: str): - """Initializes the `RequiredCheck`.""" - field_name_match = re.search(r"(? None: + """Checks that `RequiredCheck`s have sensible `jsonpath`s.""" + field_name_match = re.search(r"(? list[Issue]: @@ -146,19 +146,19 @@ def apply(self, descriptor: dict[str, Any]) -> list[Issue]: ) -def apply_custom_checks( - custom_checks: Sequence[SupportsApply], descriptor: dict[str, Any] +def apply_checks( + checks: Sequence[SupportsApply], descriptor: dict[str, Any] ) -> list[Issue]: - """Checks the descriptor for all custom checks and creates issues if any fail. + """Checks the descriptor for all user-defined checks and creates issues if any fail. Args: - custom_checks: The custom checks to apply to the descriptor. + checks: The user-defined checks to apply to the descriptor. descriptor: The descriptor to check. Returns: A list of `Issue`s. """ return _flat_map( - custom_checks, + checks, lambda check: check.apply(descriptor), ) From f7e949c9eae37f47236522e22f3082e75abdf4cc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Oct 2025 09:05:04 +0000 Subject: [PATCH 08/13] chore(pre-commit): :pencil2: automatic fixes --- src/check_datapackage/custom_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/check_datapackage/custom_check.py b/src/check_datapackage/custom_check.py index 6040c4fc..76d46f46 100644 --- a/src/check_datapackage/custom_check.py +++ b/src/check_datapackage/custom_check.py @@ -118,7 +118,7 @@ def __post_init__(self) -> None: " (e.g., `$.resources[0]`) are not allowed." ) super().__setattr__("_field_name", field_name_match.group(1)) - + def apply(self, descriptor: dict[str, Any]) -> list[Issue]: """Checks the descriptor against this check and creates issues on failure. From 37412b3bf3f5a67c67df97d732b90e8a9c407f63 Mon Sep 17 00:00:00 2001 From: Marton Vago Date: Tue, 28 Oct 2025 09:15:47 +0000 Subject: [PATCH 09/13] feat: :sparkles: disallow type required CustomChecks --- src/check_datapackage/custom_check.py | 12 ++++++++++-- tests/test_custom_check.py | 10 ++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/check_datapackage/custom_check.py b/src/check_datapackage/custom_check.py index 76d46f46..a1719305 100644 --- a/src/check_datapackage/custom_check.py +++ b/src/check_datapackage/custom_check.py @@ -14,6 +14,7 @@ class SupportsApply(Protocol): """A protocol for classes implementing an `apply()` method.""" + def apply(self, descriptor: dict[str, Any]) -> list[Issue]: """Applies the check to the descriptor and creates issues on failure. @@ -61,6 +62,14 @@ class CustomCheck: check: Callable[[Any], bool] type: str = "custom" + def __post_init__(self) -> None: + """Checks that `CustomCheck`s don't have their type set to `required`.""" + if self.type == "required": + raise ValueError( + "Cannot define `CustomCheck` with `type='required'`." + " Use `RequiredCheck` to mark fields as required instead." + ) + def apply(self, descriptor: dict[str, Any]) -> list[Issue]: """Checks the descriptor against this check and creates issues on failure. @@ -101,11 +110,11 @@ class RequiredCheck: ) ``` """ + jsonpath: str message: str _field_name: str = field(init=False, repr=False) - def __post_init__(self) -> None: """Checks that `RequiredCheck`s have sensible `jsonpath`s.""" field_name_match = re.search(r"(? None: ) super().__setattr__("_field_name", field_name_match.group(1)) - def apply(self, descriptor: dict[str, Any]) -> list[Issue]: """Checks the descriptor against this check and creates issues on failure. diff --git a/tests/test_custom_check.py b/tests/test_custom_check.py index 4321dc01..407c1667 100644 --- a/tests/test_custom_check.py +++ b/tests/test_custom_check.py @@ -177,3 +177,13 @@ def test_required_check_cannot_apply_to_ambiguous_path(jsonpath): jsonpath=jsonpath, message="This should fail.", ) + + +def test_custom_check_cannot_be_type_required(): + with raises(ValueError): + CustomCheck( + jsonpath="$.name", + message="A message.", + check=lambda _: True, + type="required", + ) From a2c60d0488fdacae00b140dc0462366a96b8b5c3 Mon Sep 17 00:00:00 2001 From: martonvago Date: Tue, 28 Oct 2025 10:47:07 +0000 Subject: [PATCH 10/13] refactor: :recycle: rename to properties --- src/check_datapackage/custom_check.py | 36 +++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/check_datapackage/custom_check.py b/src/check_datapackage/custom_check.py index a1719305..0297378a 100644 --- a/src/check_datapackage/custom_check.py +++ b/src/check_datapackage/custom_check.py @@ -15,11 +15,11 @@ class SupportsApply(Protocol): """A protocol for classes implementing an `apply()` method.""" - def apply(self, descriptor: dict[str, Any]) -> list[Issue]: - """Applies the check to the descriptor and creates issues on failure. + def apply(self, properties: dict[str, Any]) -> list[Issue]: + """Applies the check to the properties and creates issues on failure. Args: - descriptor: The descriptor to check. + properties: The properties to check. Returns: A list of `Issue`s. @@ -29,7 +29,7 @@ def apply(self, descriptor: dict[str, Any]) -> list[Issue]: @dataclass(frozen=True) class CustomCheck: - """A custom check to be done on a Data Package descriptor. + """A custom check to be done on Data Package metadata. Attributes: jsonpath (str): The location of the field or fields the custom check applies to, @@ -70,16 +70,16 @@ def __post_init__(self) -> None: " Use `RequiredCheck` to mark fields as required instead." ) - def apply(self, descriptor: dict[str, Any]) -> list[Issue]: - """Checks the descriptor against this check and creates issues on failure. + def apply(self, properties: dict[str, Any]) -> list[Issue]: + """Checks the properties against this check and creates issues on failure. Args: - descriptor: The descriptor to check. + properties: The properties to check. Returns: A list of `Issue`s. """ - matching_fields = _get_fields_at_jsonpath(self.jsonpath, descriptor) + matching_fields = _get_fields_at_jsonpath(self.jsonpath, properties) failed_fields = _filter( matching_fields, lambda field: not self.check(field.value) ) @@ -128,18 +128,18 @@ def __post_init__(self) -> None: ) super().__setattr__("_field_name", field_name_match.group(1)) - def apply(self, descriptor: dict[str, Any]) -> list[Issue]: - """Checks the descriptor against this check and creates issues on failure. + def apply(self, properties: dict[str, Any]) -> list[Issue]: + """Checks the properties against this check and creates issues on failure. Args: - descriptor: The descriptor to check. + properties: The properties to check. Returns: A list of `Issue`s. """ - matching_paths = _get_direct_jsonpaths(self.jsonpath, descriptor) + matching_paths = _get_direct_jsonpaths(self.jsonpath, properties) indirect_parent_path = self.jsonpath.removesuffix(self._field_name) - direct_parent_paths = _get_direct_jsonpaths(indirect_parent_path, descriptor) + direct_parent_paths = _get_direct_jsonpaths(indirect_parent_path, properties) missing_paths = _filter( direct_parent_paths, lambda path: f"{path}{self._field_name}" not in matching_paths, @@ -155,18 +155,18 @@ def apply(self, descriptor: dict[str, Any]) -> list[Issue]: def apply_checks( - checks: Sequence[SupportsApply], descriptor: dict[str, Any] + checks: Sequence[SupportsApply], properties: dict[str, Any] ) -> list[Issue]: - """Checks the descriptor for all user-defined checks and creates issues if any fail. + """Checks the properties for all user-defined checks and creates issues if any fail. Args: - checks: The user-defined checks to apply to the descriptor. - descriptor: The descriptor to check. + checks: The user-defined checks to apply to the properties. + properties: The properties to check. Returns: A list of `Issue`s. """ return _flat_map( checks, - lambda check: check.apply(descriptor), + lambda check: check.apply(properties), ) From b93c5f48a218a6fa159969b9f4b3af6350eb3bd4 Mon Sep 17 00:00:00 2001 From: martonvago Date: Tue, 28 Oct 2025 10:56:42 +0000 Subject: [PATCH 11/13] refactor: :recycle: remove protocol --- src/check_datapackage/config.py | 14 +++++++++----- src/check_datapackage/custom_check.py | 19 ++----------------- 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/src/check_datapackage/config.py b/src/check_datapackage/config.py index 16a4e8d8..99b77850 100644 --- a/src/check_datapackage/config.py +++ b/src/check_datapackage/config.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field from typing import Literal -from check_datapackage.custom_check import SupportsApply +from check_datapackage.custom_check import CustomCheck, RequiredCheck from check_datapackage.exclusion import Exclusion @@ -12,8 +12,8 @@ class Config: Attributes: exclusions (list[Exclusion]): Any issues matching any of Exclusion objects will be excluded (i.e., removed from the output of the check function). - custom_checks (list[CustomCheck]): Custom checks listed here will be done in - addition to checks defined in the Data Package standard. + custom_checks (list[CustomCheck | RequiredCheck]): Custom checks listed here + will be done in addition to checks defined in the Data Package standard. strict (bool): Whether to include "SHOULD" checks in addition to "MUST" checks from the Data Package standard. If True, "SHOULD" checks will also be included. Defaults to False. @@ -31,14 +31,18 @@ class Config: message="Data Packages may only be licensed under MIT.", check=lambda license_name: license_name == "mit", ) + required_title_check = cdp.RequiredCheck( + jsonpath="$.title", + message="A title is required.", + ) config = cdp.Config( exclusions=[exclusion_required], - custom_checks=[license_check], + custom_checks=[license_check, required_title_check], ) ``` """ exclusions: list[Exclusion] = field(default_factory=list) - custom_checks: list[SupportsApply] = field(default_factory=list) + custom_checks: list[CustomCheck | RequiredCheck] = field(default_factory=list) strict: bool = False version: Literal["v1", "v2"] = "v2" diff --git a/src/check_datapackage/custom_check.py b/src/check_datapackage/custom_check.py index 0297378a..c35305a2 100644 --- a/src/check_datapackage/custom_check.py +++ b/src/check_datapackage/custom_check.py @@ -1,6 +1,6 @@ import re from dataclasses import dataclass, field -from typing import Any, Callable, Protocol, Sequence +from typing import Any, Callable from check_datapackage.internals import ( _filter, @@ -12,21 +12,6 @@ from check_datapackage.issue import Issue -class SupportsApply(Protocol): - """A protocol for classes implementing an `apply()` method.""" - - def apply(self, properties: dict[str, Any]) -> list[Issue]: - """Applies the check to the properties and creates issues on failure. - - Args: - properties: The properties to check. - - Returns: - A list of `Issue`s. - """ - ... - - @dataclass(frozen=True) class CustomCheck: """A custom check to be done on Data Package metadata. @@ -155,7 +140,7 @@ def apply(self, properties: dict[str, Any]) -> list[Issue]: def apply_checks( - checks: Sequence[SupportsApply], properties: dict[str, Any] + checks: list[CustomCheck | RequiredCheck], properties: dict[str, Any] ) -> list[Issue]: """Checks the properties for all user-defined checks and creates issues if any fail. From 016b046ae8d91b5c1226d6048e6db3effbbe4c5c Mon Sep 17 00:00:00 2001 From: martonvago Date: Wed, 29 Oct 2025 10:07:21 +0000 Subject: [PATCH 12/13] refactor: :fire: remove init checks --- src/check_datapackage/custom_check.py | 36 ++++++++------------------- tests/test_custom_check.py | 32 ------------------------ 2 files changed, 10 insertions(+), 58 deletions(-) diff --git a/src/check_datapackage/custom_check.py b/src/check_datapackage/custom_check.py index c35305a2..f4d2ef19 100644 --- a/src/check_datapackage/custom_check.py +++ b/src/check_datapackage/custom_check.py @@ -1,5 +1,5 @@ import re -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Any, Callable from check_datapackage.internals import ( @@ -47,14 +47,6 @@ class CustomCheck: check: Callable[[Any], bool] type: str = "custom" - def __post_init__(self) -> None: - """Checks that `CustomCheck`s don't have their type set to `required`.""" - if self.type == "required": - raise ValueError( - "Cannot define `CustomCheck` with `type='required'`." - " Use `RequiredCheck` to mark fields as required instead." - ) - def apply(self, properties: dict[str, Any]) -> list[Issue]: """Checks the properties against this check and creates issues on failure. @@ -98,20 +90,6 @@ class RequiredCheck: jsonpath: str message: str - _field_name: str = field(init=False, repr=False) - - def __post_init__(self) -> None: - """Checks that `RequiredCheck`s have sensible `jsonpath`s.""" - field_name_match = re.search(r"(? list[Issue]: """Checks the properties against this check and creates issues on failure. @@ -122,17 +100,23 @@ def apply(self, properties: dict[str, Any]) -> list[Issue]: Returns: A list of `Issue`s. """ + # TODO: check jsonpath when checking other user input + field_name_match = re.search(r"(? Date: Wed, 29 Oct 2025 10:19:38 +0000 Subject: [PATCH 13/13] refactor: :recycle: review markups --- src/check_datapackage/check.py | 4 ++-- src/check_datapackage/custom_check.py | 33 ++++++++++++++++----------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index d01a7f55..9a888ac8 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -7,7 +7,7 @@ from check_datapackage.config import Config from check_datapackage.constants import DATA_PACKAGE_SCHEMA_PATH, GROUP_ERRORS -from check_datapackage.custom_check import apply_checks +from check_datapackage.custom_check import apply_extensions from check_datapackage.exclusion import exclude from check_datapackage.internals import ( _filter, @@ -44,7 +44,7 @@ class for more details, especially about the default values. _set_should_fields_to_required(schema) issues = _check_object_against_json_schema(properties, schema) - issues += apply_checks(config.custom_checks, properties) + issues += apply_extensions(properties, config.custom_checks) issues = exclude(issues, config.exclusions, properties) return sorted(set(issues)) diff --git a/src/check_datapackage/custom_check.py b/src/check_datapackage/custom_check.py index f4d2ef19..564e6e15 100644 --- a/src/check_datapackage/custom_check.py +++ b/src/check_datapackage/custom_check.py @@ -3,6 +3,7 @@ from typing import Any, Callable from check_datapackage.internals import ( + DescriptorField, _filter, _flat_map, _get_direct_jsonpaths, @@ -48,7 +49,7 @@ class CustomCheck: type: str = "custom" def apply(self, properties: dict[str, Any]) -> list[Issue]: - """Checks the properties against this check and creates issues on failure. + """Applies the custom check to the properties. Args: properties: The properties to check. @@ -56,12 +57,16 @@ def apply(self, properties: dict[str, Any]) -> list[Issue]: Returns: A list of `Issue`s. """ - matching_fields = _get_fields_at_jsonpath(self.jsonpath, properties) - failed_fields = _filter( - matching_fields, lambda field: not self.check(field.value) + fields: list[DescriptorField] = _get_fields_at_jsonpath( + self.jsonpath, + properties, + ) + matches: list[DescriptorField] = _filter( + fields, + lambda field: not self.check(field.value), ) return _map( - failed_fields, + matches, lambda field: Issue( jsonpath=field.jsonpath, type=self.type, message=self.message ), @@ -70,7 +75,7 @@ def apply(self, properties: dict[str, Any]) -> list[Issue]: @dataclass(frozen=True) class RequiredCheck: - """A check that checks if a field is present (i.e. not None). + """Set a specific property as required. Attributes: jsonpath (str): The location of the field or fields, expressed in [JSON @@ -92,7 +97,7 @@ class RequiredCheck: message: str def apply(self, properties: dict[str, Any]) -> list[Issue]: - """Checks the properties against this check and creates issues on failure. + """Applies the required check to the properties. Args: properties: The properties to check. @@ -123,19 +128,21 @@ def apply(self, properties: dict[str, Any]) -> list[Issue]: ) -def apply_checks( - checks: list[CustomCheck | RequiredCheck], properties: dict[str, Any] +def apply_extensions( + properties: dict[str, Any], + # TODO: extensions: Extensions once Extensions implemented + extensions: list[CustomCheck | RequiredCheck], ) -> list[Issue]: - """Checks the properties for all user-defined checks and creates issues if any fail. + """Applies the extension checks to the properties. Args: - checks: The user-defined checks to apply to the properties. properties: The properties to check. + extensions: The user-defined extensions to apply to the properties. Returns: A list of `Issue`s. """ return _flat_map( - checks, - lambda check: check.apply(properties), + extensions, + lambda extension: extension.apply(properties), )