Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion justfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ test-python:
-i coverage.xml \
-o htmlcov/coverage.svg

# Check Python code with the linter for any errors that need manual attention
# Check Python code for any errors that need manual attention
check-python:
# Check formatting
uv run ruff check .
# Check types
uv run mypy .

# Reformat Python code to match coding style and general structure
format-python:
Expand Down
5 changes: 5 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[mypy]
python_version = 3.12

[mypy-quartodoc.*]
ignore_missing_imports = True
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,17 @@ dev = [
"datamodel-code-generator>=0.28.5",
"genbadge>=1.1.2",
"jupyter>=1.1.1",
"mypy>=1.16.1",
"pre-commit>=4.2.0",
"pytest>=8.3.5",
"pytest-cov>=6.1.1",
"pyyaml>=6.0.2",
"quartodoc>=0.9.1",
"ruff>=0.11.4",
"time-machine>=2.16.0",
"types-jsonschema>=4.24.0.20250528",
"types-requests>=2.32.4.20250611",
"types-tabulate>=0.9.0.20241207",
"typos>=1.31.1",
"vulture>=2.14",
]
Expand Down
2 changes: 1 addition & 1 deletion src/seedcase_sprout/as_readme_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def join_names(licenses: list[LicenseProperties] | None) -> str:
Returns:
A comma-separated list of names.
"""
return ", ".join(license.name for license in licenses) if licenses else "N/A"
return ", ".join(str(license.name) for license in licenses) if licenses else "N/A"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a very common situation: we know that this property is not none (because we've checked the properties already), but mypy has no way of inferring this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could have a think whether now, with the new script-based approach, it would make sense for us to use Pydantic to help with object typing?



def format_date(created: str | None) -> str:
Expand Down
23 changes: 14 additions & 9 deletions src/seedcase_sprout/check_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import cast

import polars as pl

from seedcase_sprout.check_properties import (
Expand Down Expand Up @@ -70,7 +72,7 @@ def check_data(

def _check_column_names(
data: pl.DataFrame, resource_properties: ResourceProperties
) -> str:
) -> pl.DataFrame:
"""Checks that column names in `data` match those in `resource_properties`.

Columns may appear in any order.
Expand All @@ -89,7 +91,9 @@ def _check_column_names(
columns_in_data = data.schema.names()
columns_in_resource = [
field.name
for field in get_nested_attr(resource_properties, "schema.fields", default=[])
for field in cast(
list, get_nested_attr(resource_properties, "schema.fields", default=[])
)
]
extra_columns_in_data = [
name for name in columns_in_data if name not in columns_in_resource
Expand Down Expand Up @@ -142,15 +146,16 @@ def _check_column_types(
Raises:
ExceptionGroup: A group of `ValueError`s, one per incorrectly typed column.
"""
fields: list[FieldProperties] = get_nested_attr(
resource_properties, "schema.fields", default=[]
fields = cast(
list[FieldProperties],
get_nested_attr(resource_properties, "schema.fields", default=[]),
)
polars_schema = data.schema
errors = [
_get_column_type_error(polars_schema[field.name], field)
_get_column_type_error(polars_schema[str(field.name)], field)
for field in fields
if not _polars_and_datapackage_types_match(
polars_schema[field.name], field.type
polars_schema[str(field.name)], field.type
)
]

Expand Down Expand Up @@ -178,16 +183,16 @@ def _get_column_type_error(
A `ValueError`.
"""
allowed_types = _map(_get_allowed_polars_types(field.type), str)
allowed_types = (
allowed_types_str = (
allowed_types[0]
if len(allowed_types) == 1
else f"one of {', '.join(allowed_types)}"
)

if field.type == "geopoint":
allowed_types = "an Array of a numeric type with size 2"
allowed_types_str = "an Array of a numeric type with size 2"

return ValueError(
f"Expected type of column '{field.name}' "
f"to be {allowed_types} but found {polars_type}."
f"to be {allowed_types_str} but found {polars_type}."
)
10 changes: 7 additions & 3 deletions src/seedcase_sprout/check_datapackage/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@ class RequiredFieldType(str, Enum):

COMPLEX_VALIDATORS = {"allOf", "anyOf", "oneOf"}

DATA_PACKAGE_SCHEMA_PATH: Path = files(
"seedcase_sprout.check_datapackage.schemas"
).joinpath("data-package-schema.json")
DATA_PACKAGE_SCHEMA_PATH = Path(
str(
files("seedcase_sprout.check_datapackage.schemas").joinpath(
"data-package-schema.json"
)
)
)

NAME_PATTERN = r"^[a-z0-9._-]+$"

Expand Down
4 changes: 2 additions & 2 deletions src/seedcase_sprout/check_datapackage/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
)


def _read_json(path: Path) -> list | dict:
def _read_json(path: Path) -> dict:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit more correct to have list | dict, but in practice we only work with dicts, and this simplifies things.

"""Reads the contents of a JSON file into an object."""
return loads(path.read_text())

Expand Down Expand Up @@ -100,7 +100,7 @@ def _validation_errors_to_check_errors(
CheckError(
message=error.message,
json_path=_get_full_json_path_from_error(error),
validator=error.validator,
validator=str(error.validator),
)
for error in _unwrap_errors(list(validation_errors))
if error.validator not in COMPLEX_VALIDATORS
Expand Down
Empty file.
9 changes: 6 additions & 3 deletions src/seedcase_sprout/check_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def check_resource_properties(properties: ResourceProperties) -> ResourcePropert
# TODO: This probably is better placed in the `check-datapackage` package.
except ExceptionGroup as error_info:
for error in error_info.exceptions:
error.json_path = error.json_path.replace(".resources[0]", "")
if isinstance(error, cdp.CheckError):
error.json_path = error.json_path.replace(".resources[0]", "")
raise error_info

return properties
Expand Down Expand Up @@ -141,8 +142,10 @@ def _generic_check_properties(
errors = cdp.check_properties(properties_dict)

errors += get_sprout_package_errors(properties_dict)
if isinstance(properties_dict.get("resources"), list):
for index, resource in enumerate(properties_dict.get("resources")):

resources = properties_dict.get("resources")
if isinstance(resources, list):
for index, resource in enumerate(resources):
if isinstance(resource, dict):
errors += get_sprout_resource_errors(resource, index)

Expand Down
2 changes: 1 addition & 1 deletion src/seedcase_sprout/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
"""The name of the timestamp column added to the batch data (only used internally)."""
BATCH_TIMESTAMP_COLUMN_NAME = "_batch_file_timestamp_"

TEMPLATES_PATH: Path = files("seedcase_sprout").joinpath("templates")
TEMPLATES_PATH = Path(str(files("seedcase_sprout").joinpath("templates")))
4 changes: 2 additions & 2 deletions src/seedcase_sprout/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ class ExamplePackage(AbstractContextManager):

def __init__(
self,
package_name: str = example_package_properties().name,
package_name: str = str(example_package_properties().name),
with_resources: bool = True,
):
"""Initialise the `ExamplePackage` context manager.
Expand Down Expand Up @@ -375,7 +375,7 @@ def __enter__(self) -> PackagePath:
package_properties.resources = [resource_properties]

# Create resource folders
package_path.resource(resource_properties.name).mkdir(
package_path.resource(str(resource_properties.name)).mkdir(
exist_ok=True, parents=True
)

Expand Down
6 changes: 3 additions & 3 deletions src/seedcase_sprout/get_nested_attr.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ class Outer:
get_nested_attr(Outer(), "middle.inner")
```
"""
attributes = attributes.split(".")
if any(not attribute.isidentifier() for attribute in attributes):
attributes_list = attributes.split(".")
if any(not attribute.isidentifier() for attribute in attributes_list):
raise ValueError(
"`attributes` should contain valid identifiers separated by `.`."
)

try:
for attribute in attributes:
for attribute in attributes_list:
base_object = getattr(base_object, attribute)
except AttributeError:
return default
Expand Down
2 changes: 1 addition & 1 deletion src/seedcase_sprout/internals/functionals.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,5 @@ def add(a, b):
```
"""
if len(y) == 1:
y = repeat(y[0], len(x))
y = list(repeat(y[0], len(x)))
return list(map(fn, x, y))
2 changes: 1 addition & 1 deletion src/seedcase_sprout/internals/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path


def _read_json(path: Path) -> list | dict:
def _read_json(path: Path) -> dict:
"""Reads the contents of a JSON file into an object.

Args:
Expand Down
3 changes: 2 additions & 1 deletion src/seedcase_sprout/join_resource_batches.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
check_resource_properties,
)
from seedcase_sprout.constants import BATCH_TIMESTAMP_COLUMN_NAME
from seedcase_sprout.get_nested_attr import get_nested_attr
from seedcase_sprout.properties import ResourceProperties


Expand Down Expand Up @@ -69,7 +70,7 @@ def join_resource_batches(
)

data = pl.concat(data_list)
primary_key = resource_properties.schema.primary_key
primary_key = get_nested_attr(resource_properties, "schema.primary_key")
data = _drop_duplicate_obs_units(data, primary_key)

check_data(data, resource_properties)
Expand Down
5 changes: 4 additions & 1 deletion src/seedcase_sprout/map_data_types.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import cast

import polars as pl

from seedcase_sprout.properties import FieldType
Expand Down Expand Up @@ -49,7 +51,8 @@ def _get_allowed_datapackage_types(polars_type: pl.DataType) -> list[FieldType]:
Returns:
The allowed Data Package types.
"""
allowed_types = _POLARS_TO_DATAPACKAGE.get(polars_type.base_type(), ["any"])
base_type = cast(type[pl.DataType], polars_type.base_type())
allowed_types = _POLARS_TO_DATAPACKAGE.get(base_type, ["any"])

if (
isinstance(polars_type, pl.Array)
Expand Down
1 change: 1 addition & 0 deletions src/seedcase_sprout/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
)


@dataclass
class Properties(ABC):
"""An abstract base class for all `*Properties` classes holding common logic."""

Expand Down
Empty file added src/seedcase_sprout/py.typed
Empty file.
4 changes: 2 additions & 2 deletions src/seedcase_sprout/read_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def read_properties(path: Path | None = None) -> PackageProperties:
"""
path = path or PackagePath().properties()
_check_is_file(path)
properties = _read_json(path)
properties = PackageProperties.from_dict(properties)
properties_dict = _read_json(path)
properties = PackageProperties.from_dict(properties_dict)
check_properties(properties)
return properties
2 changes: 1 addition & 1 deletion src/seedcase_sprout/read_resource_batches.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def read_resource_batches(
"""
check_resource_properties(resource_properties)
if paths is None:
paths = PackagePath().resource_batch_files(resource_properties.name)
paths = PackagePath().resource_batch_files(str(resource_properties.name))

_map(paths, _check_is_file)
return _map2(paths, [resource_properties], _read_parquet_batch_file)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def _check_resource_path_format(
):
return []

expected_path = _create_resource_data_path(name)
expected_path = _create_resource_data_path(str(name))
if path == expected_path:
return []

Expand Down
2 changes: 1 addition & 1 deletion src/seedcase_sprout/write_resource_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def write_resource_batch(
check_resource_properties(resource_properties)
check_data(data, resource_properties)

batch_path = PackagePath(package_path).resource_batch(resource_properties.name)
batch_path = PackagePath(package_path).resource_batch(str(resource_properties.name))
batch_path.mkdir(exist_ok=True, parents=True)
# TODO: Move out some of this into the create_batch_file_name during refactoring
batch_file_path = batch_path / _create_batch_file_name()
Expand Down
2 changes: 1 addition & 1 deletion src/seedcase_sprout/write_resource_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def write_resource_data(
sp.write_resource_data(data, resource_properties)
"""
check_data(data, resource_properties)
data_path = PackagePath(package_path).resource_data(resource_properties.name)
data_path = PackagePath(package_path).resource_data(str(resource_properties.name))

data.write_parquet(data_path)
return data_path
4 changes: 2 additions & 2 deletions tests/assert_raises_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


def assert_raises_errors(
fn: Callable, error_type: type[BaseException], error_count: int = None
fn: Callable, error_type: type[BaseException], error_count: int | None = None
) -> None:
"""Asserts that the function raises a group of errors of the given type."""
with raises(ExceptionGroup) as error_info:
Expand All @@ -18,6 +18,6 @@ def assert_raises_errors(
assert len(errors) == error_count


def assert_raises_check_errors(fn: Callable, error_count: int = None) -> None:
def assert_raises_check_errors(fn: Callable, error_count: int | None = None) -> None:
"""Asserts that the function raises a group of `CheckError`s."""
assert_raises_errors(fn, CheckError, error_count)
2 changes: 2 additions & 0 deletions tests/test_create_properties_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def test_works_with_custom_path(tmp_path):
def load_properties(path: Path) -> PackageProperties:
"""Loads `properties` object from file."""
spec = spec_from_file_location("test_module", path)
assert spec
assert spec.loader
Comment on lines +45 to +46
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought this was okay to do in tests to tell mypy these are not none. If the assert fails, the test fails -- as it should.

module = module_from_spec(spec)
spec.loader.exec_module(module)
return module.properties
Expand Down
3 changes: 2 additions & 1 deletion tests/test_extract_resource_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def _keep_extractable_properties(
example_properties: ResourceProperties,
) -> ResourceProperties:
"""Filter example properties to only keep the extractable properties."""

assert example_properties.schema
assert example_properties.schema.fields
fields = list(
map(
lambda field: FieldProperties(name=field.name, type=field.type),
Expand Down
1 change: 1 addition & 0 deletions tools/vulture-allowlist.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# ruff: noqa
# type: ignore
email # unused variable (src/seedcase_sprout/properties.py:91)
given_name # unused variable (src/seedcase_sprout/properties.py:92)
family_name # unused variable (src/seedcase_sprout/properties.py:93)
Expand Down
Loading