Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
20ced70
feat: :sparkles: add RequiredRule
martonvago Oct 13, 2025
520f04a
refactor: :recycle: simplify apply logic
martonvago Oct 14, 2025
3b8a7c3
fix: :bug: remove unused import
martonvago Oct 14, 2025
e0f3ac9
fix: :bug: disallow all ambiguous JSON paths
martonvago Oct 14, 2025
7a1b03d
Merge branch 'main' into feat/required-rule
martonvago Oct 14, 2025
4ecfd81
Merge branch 'main' into feat/required-rule
martonvago Oct 15, 2025
4a729b5
Merge branch 'main' into feat/required-rule
martonvago Oct 17, 2025
11d80d4
Merge branch 'main' into feat/required-rule
martonvago Oct 17, 2025
65ce0db
Merge branch 'main' into feat/required-rule
martonvago Oct 27, 2025
3cb33cf
refactor: :recycle: make RequiredCheck not inherit from CustomCheck
martonvago Oct 28, 2025
160d764
chore(pre-commit): :pencil2: automatic fixes
pre-commit-ci[bot] Oct 28, 2025
7270e9b
refactor: :recycle: check JSON path in post_init
martonvago Oct 28, 2025
639b9d9
Merge branch 'feat/required-rule' of github.com:seedcase-project/chec…
martonvago Oct 28, 2025
f7e949c
chore(pre-commit): :pencil2: automatic fixes
pre-commit-ci[bot] Oct 28, 2025
37412b3
feat: :sparkles: disallow type required CustomChecks
martonvago Oct 28, 2025
a2c60d0
refactor: :recycle: rename to properties
martonvago Oct 28, 2025
b93c5f4
refactor: :recycle: remove protocol
martonvago Oct 28, 2025
0195941
Merge branch 'main' into feat/required-rule
martonvago Oct 29, 2025
016b046
refactor: :fire: remove init checks
martonvago Oct 29, 2025
70af3f4
refactor: :recycle: review markups
martonvago Oct 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions _quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ quartodoc:
- name: read_json
- name: Config
- name: CustomCheck
- name: RequiredCheck
- name: Exclude
- name: Issue
- name: example_package_properties
Expand Down
3 changes: 2 additions & 1 deletion src/check_datapackage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from .check import check
from .config import Config
from .custom_check import CustomCheck
from .custom_check import CustomCheck, RequiredCheck
from .examples import example_package_properties, example_resource_properties
from .exclude import Exclude
from .issue import Issue
Expand All @@ -13,6 +13,7 @@
"Exclude",
"Issue",
"CustomCheck",
"RequiredCheck",
"example_package_properties",
"example_resource_properties",
"check",
Expand Down
122 changes: 91 additions & 31 deletions src/check_datapackage/custom_check.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import re
from dataclasses import dataclass
from typing import Any, Callable

from check_datapackage.internals import (
_filter,
_flat_map,
_get_direct_jsonpaths,
_get_fields_at_jsonpath,
_map,
)
from check_datapackage.issue import Issue


@dataclass
@dataclass(frozen=True)
class CustomCheck:
"""A custom check to be done on a Data Package descriptor.

Expand Down Expand Up @@ -44,6 +46,93 @@ class CustomCheck:
check: Callable[[Any], bool]
type: str = "custom"

def apply(self, descriptor: dict[str, Any]) -> list[Issue]:
"""Checks the descriptor against this check and creates issues on failure.

Args:
descriptor: The descriptor to check.

Returns:
A list of `Issue`s.
"""
matching_fields = _get_fields_at_jsonpath(self.jsonpath, descriptor)
failed_fields = _filter(
matching_fields, lambda field: not self.check(field.value)
)
return _map(
failed_fields,
lambda field: Issue(
jsonpath=field.jsonpath, type=self.type, message=self.message
),
)


class RequiredCheck(CustomCheck):
"""A custom check that checks that a field is present (i.e. not None).

Attributes:
jsonpath (str): The location of the field or fields, expressed in [JSON
path](https://jg-rp.github.io/python-jsonpath/syntax/) notation, to which
the check applies (e.g., `$.resources[*].name`).
message (str): The message that is shown when the check fails.

Examples:
```{python}
import check_datapackage as cdp
required_title_check = cdp.RequiredCheck(
jsonpath="$.title",
message="A title is required.",
)
```
"""

_field_name: str

def __init__(self, jsonpath: str, message: str):
"""Initializes the `RequiredCheck`."""
field_name_match = re.search(r"(?<!\.)(\.\w+)$", jsonpath)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Find field names not preceded by .. So, e.g., $.resources[*].name >> .name

if not field_name_match:
raise ValueError(
f"Cannot define `RequiredCheck` for JSON path `{jsonpath}`."
" A `RequiredCheck` must target a concrete object field (e.g.,"
" `$.title`) or set of fields (e.g., `$.resources[*].title`)."
" Ambiguous paths (e.g., `$..title`) or paths pointing to array items"
" (e.g., `$.resources[0]`) are not allowed."
)

self._field_name = field_name_match.group(1)
super().__init__(
jsonpath=jsonpath,
message=message,
check=lambda value: value is not None,
type="required",
)

def apply(self, descriptor: dict[str, Any]) -> list[Issue]:
"""Checks the descriptor against this check and creates issues on failure.

Args:
descriptor: The descriptor to check.

Returns:
A list of `Issue`s.
"""
matching_paths = _get_direct_jsonpaths(self.jsonpath, descriptor)
indirect_parent_path = self.jsonpath.removesuffix(self._field_name)
direct_parent_paths = _get_direct_jsonpaths(indirect_parent_path, descriptor)
missing_paths = _filter(
direct_parent_paths,
lambda path: f"{path}{self._field_name}" not in matching_paths,
)
return _map(
missing_paths,
lambda path: Issue(
jsonpath=path + self._field_name,
type=self.type,
message=self.message,
),
)


def apply_custom_checks(
custom_checks: list[CustomCheck], descriptor: dict[str, Any]
Expand All @@ -59,34 +148,5 @@ def apply_custom_checks(
"""
return _flat_map(
custom_checks,
lambda custom_check: _apply_custom_check(custom_check, descriptor),
)


def _apply_custom_check(
custom_check: CustomCheck, descriptor: dict[str, Any]
) -> list[Issue]:
"""Applies the custom check to the descriptor.

If any fields fail the custom check, this function creates a list of issues
for those fields.

Args:
custom_check: The custom check to apply to the descriptor.
descriptor: The descriptor to check.

Returns:
A list of `Issue`s.
"""
matching_fields = _get_fields_at_jsonpath(custom_check.jsonpath, descriptor)
failed_fields = _filter(
matching_fields, lambda field: not custom_check.check(field.value)
)
return _map(
failed_fields,
lambda field: Issue(
jsonpath=field.jsonpath,
type=custom_check.type,
message=custom_check.message,
),
lambda check: check.apply(descriptor),
)
6 changes: 6 additions & 0 deletions src/check_datapackage/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ def _get_fields_at_jsonpath(
return _map(matches, _create_descriptor_field)


def _get_direct_jsonpaths(jsonpath: str, descriptor: dict[str, Any]) -> list[str]:
"""Returns all direct JSON paths that match a direct or indirect JSON path."""
fields = _get_fields_at_jsonpath(jsonpath, descriptor)
return _map(fields, lambda field: field.jsonpath)


def _create_descriptor_field(match: JSONPathMatch) -> DescriptorField:
return DescriptorField(
jsonpath=match.path.replace("['", ".").replace("']", ""),
Expand Down
92 changes: 86 additions & 6 deletions tests/test_custom_check.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from pytest import mark, raises

from check_datapackage.check import check
from check_datapackage.config import Config
from check_datapackage.custom_check import CustomCheck
from check_datapackage.custom_check import CustomCheck, RequiredCheck
from check_datapackage.examples import (
example_package_properties,
example_resource_properties,
Expand Down Expand Up @@ -54,12 +56,24 @@ def test_indirect_jsonpath():


def test_multiple_custom_checks():
properties = example_package_properties()
properties["name"] = "ALLCAPS"
properties["resources"][0]["name"] = "not starting with woolly"
descriptor = example_package_properties()
descriptor["name"] = "ALLCAPS"
descriptor["resources"][0]["name"] = "not starting with woolly"
del descriptor["version"]

version_check = RequiredCheck(
jsonpath="$.version",
message="Version is required.",
)

config = Config(custom_checks=[lowercase_check, resource_name_check])
issues = check(properties, config=config)
config = Config(
custom_checks=[
lowercase_check,
resource_name_check,
version_check,
]
)
issues = check(descriptor, config=config)

assert issues == [
Issue(
Expand All @@ -72,6 +86,11 @@ def test_multiple_custom_checks():
type=resource_name_check.type,
message=resource_name_check.message,
),
Issue(
jsonpath=version_check.jsonpath,
type="required",
message=version_check.message,
),
]


Expand All @@ -97,3 +116,64 @@ def test_no_matching_jsonpath():
issues = check(properties, config=config)

assert issues == []


def test_required_check_wildcard():
descriptor = example_package_properties()
id_check = RequiredCheck(
jsonpath="$.*.id",
message="All fields must have an id.",
)
config = Config(custom_checks=[id_check])

issues = check(descriptor, config=config)

assert len(issues) == 8


def test_required_check_array_wildcard():
descriptor = example_package_properties()
descriptor["contributors"] = [
{"path": "a/path"},
{"path": "a/path"},
{"path": "a/path", "name": "a name"},
]
name_check = RequiredCheck(
jsonpath="$.contributors[*].name",
message="Contributor name is required.",
)
config = Config(custom_checks=[name_check])
issues = check(descriptor, config=config)

assert issues == [
Issue(
jsonpath="$.contributors[0].name",
type=name_check.type,
message=name_check.message,
),
Issue(
jsonpath="$.contributors[1].name",
type=name_check.type,
message=name_check.message,
),
]


@mark.parametrize(
"jsonpath",
[
"$",
"..*",
"created",
"$..path",
"..resources",
"$.resources[0].*",
"$.resources[*]",
],
)
def test_required_check_cannot_apply_to_ambiguous_path(jsonpath):
with raises(ValueError):
RequiredCheck(
jsonpath=jsonpath,
message="This should fail.",
)
Loading