Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 49 additions & 2 deletions src/check_datapackage/custom_check.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
from dataclasses import dataclass
import re
from dataclasses import dataclass, field
from typing import Any, Callable

from check_datapackage.internals import (
DescriptorField,
_filter,
_flat_map,
_get_direct_jsonpaths,
_get_fields_at_jsonpath,
_map,
)
from check_datapackage.issue import Issue


@dataclass
@dataclass(frozen=True)
class CustomCheck:
"""A custom check to be done on a Data Package descriptor.

Expand All @@ -25,6 +28,9 @@ class CustomCheck:
type (str): An identifier for the custom check. It will be shown in error
messages and can be used to exclude the check. Each custom check
should have a unique `type`.
check_missing (bool): Whether fields that would match the JSON path but are
missing from the object should be passed to `check` as `None`.
Defaults to False.

Examples:
```{python}
Expand All @@ -43,6 +49,23 @@ class CustomCheck:
message: str
check: Callable[[Any], bool]
type: str = "custom"
check_missing: bool = False
_field_name: str = field(init=False, repr=False)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not exposing this in the constructor


def __post_init__(self) -> None:
"""Checks that `CustomCheck`s with `check_missing` have sensible `jsonpath`s."""
if self.check_missing:
field_name_match = re.search(r"(?<!\.)(\.\w+)$", self.jsonpath)
if not field_name_match:
raise ValueError(
f"Cannot define `CustomCheck` for JSON path `{self.jsonpath}`."
" A check with `check_missing` set to true must target a concrete "
"object field (e.g., `$.title`) or set of fields (e.g., "
"`$.resources[*].title`). Ambiguous paths (e.g., `$..title`) or "
"paths pointing to array items (e.g., `$.resources[0]`) are not"
" allowed."
)
super().__setattr__("_field_name", field_name_match.group(1))


def apply_custom_checks(
Expand Down Expand Up @@ -79,6 +102,11 @@ def _apply_custom_check(
A list of `Issue`s.
"""
matching_fields = _get_fields_at_jsonpath(custom_check.jsonpath, descriptor)
if custom_check.check_missing:
matching_fields += _get_missing_fields(
custom_check, descriptor, matching_fields
)

failed_fields = _filter(
matching_fields, lambda field: not custom_check.check(field.value)
)
Expand All @@ -90,3 +118,22 @@ def _apply_custom_check(
message=custom_check.message,
),
)


def _get_missing_fields(
check: CustomCheck,
descriptor: dict[str, Any],
matching_fields: list[DescriptorField],
) -> list[DescriptorField]:
"""Returns the missing fields that the check would apply to if they were present."""
parent_jsonpath = check.jsonpath.removesuffix(check._field_name)
potentially_matching_paths = _map(
_get_direct_jsonpaths(parent_jsonpath, descriptor),
lambda path: f"{path}{check._field_name}",
)
actually_matching_paths = _map(matching_fields, lambda field: field.jsonpath)
missing_paths = _filter(
potentially_matching_paths,
lambda path: path not in actually_matching_paths,
)
return _map(missing_paths, lambda path: DescriptorField(jsonpath=path, value=None))
6 changes: 6 additions & 0 deletions src/check_datapackage/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ def _get_fields_at_jsonpath(
return _map(matches, _create_descriptor_field)


def _get_direct_jsonpaths(jsonpath: str, descriptor: dict[str, Any]) -> list[str]:
"""Returns all direct JSON paths that match a direct or indirect JSON path."""
fields = _get_fields_at_jsonpath(jsonpath, descriptor)
return _map(fields, lambda field: field.jsonpath)


def _create_descriptor_field(match: JSONPathMatch) -> DescriptorField:
return DescriptorField(
jsonpath=match.path.replace("['", ".").replace("']", ""),
Expand Down
123 changes: 118 additions & 5 deletions tests/test_custom_check.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from typing import Any

from pytest import mark, raises

from check_datapackage.check import check
from check_datapackage.config import Config
from check_datapackage.custom_check import CustomCheck
Expand All @@ -21,6 +25,10 @@
)


def must_not_be_null(value: Any) -> bool:
return value is not None


def test_direct_jsonpath():
properties = example_package_properties()
properties["name"] = "ALLCAPS"
Expand Down Expand Up @@ -54,12 +62,27 @@ def test_indirect_jsonpath():


def test_multiple_custom_checks():
properties = example_package_properties()
properties["name"] = "ALLCAPS"
properties["resources"][0]["name"] = "not starting with woolly"
descriptor = example_package_properties()
descriptor["name"] = "ALLCAPS"
descriptor["resources"][0]["name"] = "not starting with woolly"
del descriptor["version"]

version_check = CustomCheck(
jsonpath="$.version",
message="Version is required.",
type="required",
check=must_not_be_null,
check_missing=True,
)

config = Config(custom_checks=[lowercase_check, resource_name_check])
issues = check(properties, config=config)
config = Config(
custom_checks=[
lowercase_check,
resource_name_check,
version_check,
]
)
issues = check(descriptor, config=config)

assert issues == [
Issue(
Expand All @@ -72,6 +95,11 @@ def test_multiple_custom_checks():
type=resource_name_check.type,
message=resource_name_check.message,
),
Issue(
jsonpath=version_check.jsonpath,
type="required",
message=version_check.message,
),
]


Expand All @@ -97,3 +125,88 @@ def test_no_matching_jsonpath():
issues = check(properties, config=config)

assert issues == []


def test_no_matching_jsonpath_with_check_missing():
properties = example_package_properties()
custom_check = CustomCheck(
jsonpath="$.missing",
message="This check always fails.",
check=lambda value: False,
type="always-fail",
check_missing=True,
)
config = Config(custom_checks=[custom_check])
issues = check(properties, config=config)

assert len(issues) == 1


def test_required_check_wildcard():
descriptor = example_package_properties()
id_check = CustomCheck(
jsonpath="$.*.id",
message="All fields must have an id.",
type="required",
check=must_not_be_null,
check_missing=True,
)
config = Config(custom_checks=[id_check])

issues = check(descriptor, config=config)

assert len(issues) == 8


def test_required_check_array_wildcard():
descriptor = example_package_properties()
descriptor["contributors"] = [
{"path": "a/path"},
{"path": "a/path"},
{"path": "a/path", "name": "a name"},
]
name_check = CustomCheck(
jsonpath="$.contributors[*].name",
message="Contributor name is required.",
type="required",
check=must_not_be_null,
check_missing=True,
)
config = Config(custom_checks=[name_check])
issues = check(descriptor, config=config)

assert issues == [
Issue(
jsonpath="$.contributors[0].name",
type=name_check.type,
message=name_check.message,
),
Issue(
jsonpath="$.contributors[1].name",
type=name_check.type,
message=name_check.message,
),
]


@mark.parametrize(
"jsonpath",
[
"$",
"..*",
"created",
"$..path",
"..resources",
"$.resources[0].*",
"$.resources[*]",
],
)
def test_required_check_cannot_apply_to_ambiguous_path(jsonpath):
with raises(ValueError):
CustomCheck(
jsonpath=jsonpath,
message="This should fail.",
type="required",
check=must_not_be_null,
check_missing=True,
)
Loading