Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion _quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ quartodoc:
- name: read_json
- name: Config
- name: CustomCheck
- name: Exclude
- name: Exclusion
- name: Issue
- name: example_package_properties
- name: example_resource_properties
Expand Down
14 changes: 8 additions & 6 deletions docs/design/interface.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,13 @@ function.

See the help documentation with `help(Config)` for more details.

### {{< var wip >}} `Exclude`
### {{< var wip >}} `Exclusion`

A subitem of `Config` for expressing checks to ignore.
A subitem of `Config` that expresses checks to exclude. This can be
useful if you want to exclude (or skip) certain checks from the Data
Package standard that are not relevant to your use case.

See the help documentation with `help(Exclude)` for more details.
See the help documentation with `help(Exclusion)` for more details.

#### {{< var wip >}} `CustomCheck`

Expand Down Expand Up @@ -164,7 +166,7 @@ flowchart TD

config[/Config/]
custom_check[/CustomCheck/]
exclude[/Exclude/]
exclusion[/Exclusion/]
check["check()"]
issues[/"list[Issue]"/]

Expand All @@ -173,8 +175,8 @@ flowchart TD

descriptor_file --> read_json --> descriptor
config_file --> read_config --> config
custom_check & exclude --> config
custom_check & exclude -.-> config_file
custom_check & exclusion --> config
custom_check & exclusion -.-> config_file

descriptor & config --> check --> issues --> explain --> messages
```
22 changes: 11 additions & 11 deletions docs/guide/config.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on the descriptor. The following configuration options are available:

- `version`: The version of Data Package standard to check against.
Defaults to `v2`.
- `exclude`: The list of checks to exclude.
- `exclusions`: A list of checks to exclude.
- `custom_checks`: The list of custom checks to run in addition to the
checks defined in the standard.
- `strict`: Whether to run recommended checks in addition to required
Expand All @@ -23,42 +23,42 @@ to.
The Data Package standard defines a range of check types (e.g.,
`required` or `pattern`) and it is also possible to create your own. For
example, to exclude checks flagging missing fields, you would exclude
the `required` check by defining an `Exclude` object with this `type`:
the `required` check by defining an `Exclusion` object with this `type`:

```{python}
from textwrap import dedent
import check_datapackage as cdp

exclude_required = cdp.Exclude(type="required")
exclusion_required = cdp.Exclusion(type="required")
```

To exclude checks of a specific field or fields, you can use a [JSON
path](https://en.wikipedia.org/wiki/JSONPath) in the `jsonpath`
attribute of an `Exclude` object. For example, you can exclude all
attribute of an `Exclusion` object. For example, you can exclude all
checks on the `name` field of the Data Package descriptor by writing:

```{python}
exclude_name = cdp.Exclude(jsonpath="$.name")
exclusion_name = cdp.Exclusion(jsonpath="$.name")
```

Or you can use the wildcard JSON path selector to exclude checks on the
`path` field of **all** Data Resource descriptors:

```{python}
exclude_path = cdp.Exclude(jsonpath="$.resources[*].path")
exclusion_path = cdp.Exclusion(jsonpath="$.resources[*].path")
```

The `type` and `jsonpath` arguments can also be combined:

```{python}
exclude_desc_required = cdp.Exclude(type="required", jsonpath="$.resources[*].description")
exclusion_desc_required = cdp.Exclusion(type="required", jsonpath="$.resources[*].description")
```

This would exclude required checks on the `description` field of Data
This will exclude required checks on the `description` field of Data
Resource descriptors.

To make the `check()` function aware of your exclusions, you add them to
the `Config` object passed to the function:
To apply your exclusions when running the `check()`, you add them to the
`Config` object passed to the `check()` function:

```{python}
package_descriptor = {
Expand All @@ -77,7 +77,7 @@ package_descriptor = {
],
}

config = cdp.Config(exclude=[exclude_required, exclude_name, exclude_path])
config = cdp.Config(exclusions=[exclusion_required, exclusion_name, exclusion_path])
cdp.check(descriptor=package_descriptor, config=config)
```

Expand Down
4 changes: 2 additions & 2 deletions src/check_datapackage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
from .config import Config
from .custom_check import CustomCheck
from .examples import example_package_properties, example_resource_properties
from .exclude import Exclude
from .exclusion import Exclusion
from .issue import Issue
from .read_json import read_json

__all__ = [
"Config",
"Exclude",
"Exclusion",
"Issue",
"CustomCheck",
"example_package_properties",
Expand Down
4 changes: 2 additions & 2 deletions src/check_datapackage/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from check_datapackage.config import Config
from check_datapackage.constants import DATA_PACKAGE_SCHEMA_PATH, GROUP_ERRORS
from check_datapackage.custom_check import apply_custom_checks
from check_datapackage.exclude import exclude
from check_datapackage.exclusion import exclude
from check_datapackage.internals import (
_filter,
_flat_map,
Expand Down Expand Up @@ -44,7 +44,7 @@ class for more details, especially about the default values.

issues = _check_object_against_json_schema(descriptor, schema)
issues += apply_custom_checks(config.custom_checks, descriptor)
issues = exclude(issues, config.exclude, descriptor)
issues = exclude(issues, config.exclusions, descriptor)

return sorted(set(issues))

Expand Down
15 changes: 9 additions & 6 deletions src/check_datapackage/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
from typing import Literal

from check_datapackage.custom_check import CustomCheck
from check_datapackage.exclude import Exclude
from check_datapackage.exclusion import Exclusion


@dataclass
class Config:
"""Configuration for checking a Data Package descriptor.

Attributes:
exclude (list[Exclude]): Any issues matching any of these exclusions will be
ignored (i.e., removed from the output of the check function).
exclusions (list[Exclusion]): Any issues matching any of Exclusion objects will
be excluded (i.e., removed from the output of the check function).
custom_checks (list[CustomCheck]): Custom checks listed here will be done in
addition to checks defined in the Data Package standard.
strict (bool): Whether to run recommended as well as required checks. If
Expand All @@ -23,18 +23,21 @@ class Config:
```{python}
import check_datapackage as cdp

exclude_required = cdp.Exclude(type="required")
exclusion_required = cdp.Exclusion(type="required")
license_check = cdp.CustomCheck(
type="only-mit",
jsonpath="$.licenses[*].name",
message="Data Packages may only be licensed under MIT.",
check=lambda license_name: license_name == "mit",
)
config = cdp.Config(exclude=[exclude_required], custom_checks=[license_check])
config = cdp.Config(
exclusions=[exclusion_required],
custom_checks=[license_check],
)
```
"""

exclude: list[Exclude] = field(default_factory=list)
exclusions: list[Exclusion] = field(default_factory=list)
custom_checks: list[CustomCheck] = field(default_factory=list)
strict: bool = False
version: Literal["v1", "v2"] = "v2"
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@


@dataclass
class Exclude:
r"""Exclude issues when checking a Data Package descriptor.
class Exclusion:
r"""A check to be excluded when checking properties.

When you use both `jsonpath` and `type` in the same `Exclude`, only issues that
match *both* will be excluded, meaning it is an `AND` logic. If you want `OR` logic,
use multiple `Exclude` objects in the `Config`.
When you use both `jsonpath` and `type` in the same `Exclusion` object, only issues
that match *both* will be excluded, meaning it is an `AND` logic. If you want `OR`
logic, use multiple `Exclusion` objects in the `Config`.

Attributes:
jsonpath (Optional[str]): [JSON path](https://jg-rp.github.io/python-jsonpath/syntax/)
Expand All @@ -30,9 +30,9 @@ class Exclude:
```{python}
import check_datapackage as cdp

exclude_required = cdp.Exclude(type="required")
exclude_name = cdp.Exclude(jsonpath="$.name")
exclude_desc_required = cdp.Exclude(
exclusion_required = cdp.Exclusion(type="required")
exclusion_name = cdp.Exclusion(jsonpath="$.name")
exclusion_desc_required = cdp.Exclusion(
type="required",
jsonpath="$.resources[*].description"
)
Expand All @@ -44,36 +44,38 @@ class Exclude:


def exclude(
issues: list[Issue], excludes: list[Exclude], descriptor: dict[str, Any]
issues: list[Issue], exclusions: list[Exclusion], descriptor: dict[str, Any]
) -> list[Issue]:
"""Exclude issues based on the provided configuration settings."""
"""Exclude issues defined by Exclusion objects."""
return _filter(
issues,
lambda issue: not _get_any_matches(issue, excludes, descriptor),
lambda issue: not _get_any_matches(issue, exclusions, descriptor),
)


def _get_any_matches(
issue: Issue, excludes: list[Exclude], descriptor: dict[str, Any]
issue: Issue, exclusions: list[Exclusion], descriptor: dict[str, Any]
) -> bool:
matches: list[bool] = _map(
excludes, lambda exclude: _get_matches(issue, exclude, descriptor)
exclusions, lambda exclusion: _get_matches(issue, exclusion, descriptor)
)
return any(matches)


def _get_matches(issue: Issue, exclude: Exclude, descriptor: dict[str, Any]) -> bool:
def _get_matches(
issue: Issue, exclusion: Exclusion, descriptor: dict[str, Any]
) -> bool:
matches: list[bool] = []

both_none = exclude.jsonpath is None and exclude.type is None
both_none = exclusion.jsonpath is None and exclusion.type is None
if both_none:
return False

if exclude.jsonpath:
matches.append(_same_jsonpath(issue, exclude.jsonpath, descriptor))
if exclusion.jsonpath:
matches.append(_same_jsonpath(issue, exclusion.jsonpath, descriptor))

if exclude.type:
matches.append(_same_type(issue, exclude.type))
if exclusion.type:
matches.append(_same_type(issue, exclusion.type))

return all(matches)

Expand Down
16 changes: 9 additions & 7 deletions tests/test_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
example_package_properties,
example_resource_properties,
)
from check_datapackage.exclude import Exclude
from check_datapackage.exclusion import Exclusion
from tests.test_custom_check import lowercase_check

# Without recommendations
Expand Down Expand Up @@ -142,24 +142,26 @@ def test_fails_properties_violating_recommendations():
}


def test_exclude_not_excluding_custom_check():
def test_exclusion_does_not_exclude_custom_check():
"""Exclusion should not exclude custom check if types do not match."""
properties = example_package_properties()
properties["name"] = "ALLCAPS"
del properties["resources"]
exclude_required = Exclude(type="required")
config = Config(custom_checks=[lowercase_check], exclude=[exclude_required])
exclusion_required = Exclusion(type="required")
config = Config(custom_checks=[lowercase_check], exclusions=[exclusion_required])

issues = check(properties, config=config)

assert len(issues) == 1
assert issues[0].type == "lowercase"


def test_exclude_excluding_custom_check():
def test_exclusion_does_exclude_custom_check():
"""Exclusion should exclude custom check if types match."""
properties = example_package_properties()
properties["name"] = "ALLCAPS"
exclude_lowercase = Exclude(type=lowercase_check.type)
config = Config(custom_checks=[lowercase_check], exclude=[exclude_lowercase])
exclusion_lowercase = Exclusion(type=lowercase_check.type)
config = Config(custom_checks=[lowercase_check], exclusions=[exclusion_lowercase])

issues = check(properties, config=config)

Expand Down
Loading