From 1d4f805ba57cb3d1e9cf8e67c098d5f2a6fa700c Mon Sep 17 00:00:00 2001 From: Yuri Zmytrakov Date: Mon, 22 Sep 2025 10:09:40 +0200 Subject: [PATCH 1/2] feat: use_datetime env var to modify search - Added USE_DATETIME env var to control datetime filtering - USE_DATETIME=True (default): use existing logic that handles both datetime and start/end datetime fields - USE_DATETIME=False (default): use only start/end datetime fields for search - Added USE_DATETIME env var tests --- .../elasticsearch/database_logic.py | 137 ++++++++++++------ .../stac_fastapi/opensearch/database_logic.py | 137 ++++++++++++------ stac_fastapi/tests/api/test_api.py | 86 +++++++++++ 3 files changed, 272 insertions(+), 88 deletions(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index e852ed774..85cf846a7 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -17,7 +17,7 @@ from stac_fastapi.core.base_database_logic import BaseDatabaseLogic from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer -from stac_fastapi.core.utilities import bbox2polygon, get_max_limit +from stac_fastapi.core.utilities import bbox2polygon, get_bool_env, get_max_limit from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings from stac_fastapi.elasticsearch.config import ( ElasticsearchSettings as SyncElasticsearchSettings, @@ -289,26 +289,99 @@ def apply_datetime_filter( Returns: The filtered search object. """ + # USE_DATETIME env var + # True: Search by datetime, if null search by start/end datetime + # False: Always search only by start/end datetime + USE_DATETIME = get_bool_env("USE_DATETIME", default=True) + datetime_search = return_date(datetime) if not datetime_search: return search, datetime_search - if "eq" in datetime_search: - # For exact matches, include: - # 1. Items with matching exact datetime - # 2. Items with datetime:null where the time falls within their range - should = [ - Q( - "bool", - filter=[ - Q("exists", field="properties.datetime"), - Q("term", **{"properties__datetime": datetime_search["eq"]}), - ], - ), - Q( + if USE_DATETIME: + if "eq" in datetime_search: + # For exact matches, include: + # 1. Items with matching exact datetime + # 2. Items with datetime:null where the time falls within their range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "term", + **{"properties__datetime": datetime_search["eq"]}, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={ + "lte": datetime_search["eq"] + }, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["eq"]}, + ), + ], + ), + ] + else: + # For date ranges, include: + # 1. Items with datetime in the range + # 2. Items with datetime:null that overlap the search range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "range", + properties__datetime={ + "gte": datetime_search["gte"], + "lte": datetime_search["lte"], + }, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={ + "lte": datetime_search["lte"] + }, + ), + Q( + "range", + properties__end_datetime={ + "gte": datetime_search["gte"] + }, + ), + ], + ), + ] + + return ( + search.query(Q("bool", should=should, minimum_should_match=1)), + datetime_search, + ) + else: + if "eq" in datetime_search: + filter_query = Q( "bool", - must_not=[Q("exists", field="properties.datetime")], filter=[ Q("exists", field="properties.start_datetime"), Q("exists", field="properties.end_datetime"), @@ -321,29 +394,10 @@ def apply_datetime_filter( properties__end_datetime={"gte": datetime_search["eq"]}, ), ], - ), - ] - else: - # For date ranges, include: - # 1. Items with datetime in the range - # 2. Items with datetime:null that overlap the search range - should = [ - Q( - "bool", - filter=[ - Q("exists", field="properties.datetime"), - Q( - "range", - properties__datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( + ) + else: + filter_query = Q( "bool", - must_not=[Q("exists", field="properties.datetime")], filter=[ Q("exists", field="properties.start_datetime"), Q("exists", field="properties.end_datetime"), @@ -356,13 +410,8 @@ def apply_datetime_filter( properties__end_datetime={"gte": datetime_search["gte"]}, ), ], - ), - ] - - return ( - search.query(Q("bool", should=should, minimum_should_match=1)), - datetime_search, - ) + ) + return search.query(filter_query), datetime_search @staticmethod def apply_bbox_filter(search: Search, bbox: List): diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index e54397bab..f320ed511 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -16,7 +16,7 @@ from stac_fastapi.core.base_database_logic import BaseDatabaseLogic from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer -from stac_fastapi.core.utilities import bbox2polygon, get_max_limit +from stac_fastapi.core.utilities import bbox2polygon, get_bool_env, get_max_limit from stac_fastapi.extensions.core.transaction.request import ( PartialCollection, PartialItem, @@ -301,21 +301,94 @@ def apply_datetime_filter( if not datetime_search: return search, datetime_search - if "eq" in datetime_search: - # For exact matches, include: - # 1. Items with matching exact datetime - # 2. Items with datetime:null where the time falls within their range - should = [ - Q( - "bool", - filter=[ - Q("exists", field="properties.datetime"), - Q("term", **{"properties__datetime": datetime_search["eq"]}), - ], - ), - Q( + # USE_DATETIME env var + # True: Search by datetime, if null search by start/end datetime + # False: Always search only by start/end datetime + USE_DATETIME = get_bool_env("USE_DATETIME", default=True) + + if USE_DATETIME: + if "eq" in datetime_search: + # For exact matches, include: + # 1. Items with matching exact datetime + # 2. Items with datetime:null where the time falls within their range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "term", + **{"properties__datetime": datetime_search["eq"]}, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={ + "lte": datetime_search["eq"] + }, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["eq"]}, + ), + ], + ), + ] + else: + # For date ranges, include: + # 1. Items with datetime in the range + # 2. Items with datetime:null that overlap the search range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "range", + properties__datetime={ + "gte": datetime_search["gte"], + "lte": datetime_search["lte"], + }, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={ + "lte": datetime_search["lte"] + }, + ), + Q( + "range", + properties__end_datetime={ + "gte": datetime_search["gte"] + }, + ), + ], + ), + ] + + return ( + search.query(Q("bool", should=should, minimum_should_match=1)), + datetime_search, + ) + else: + if "eq" in datetime_search: + filter_query = Q( "bool", - must_not=[Q("exists", field="properties.datetime")], filter=[ Q("exists", field="properties.start_datetime"), Q("exists", field="properties.end_datetime"), @@ -328,29 +401,10 @@ def apply_datetime_filter( properties__end_datetime={"gte": datetime_search["eq"]}, ), ], - ), - ] - else: - # For date ranges, include: - # 1. Items with datetime in the range - # 2. Items with datetime:null that overlap the search range - should = [ - Q( - "bool", - filter=[ - Q("exists", field="properties.datetime"), - Q( - "range", - properties__datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( + ) + else: + filter_query = Q( "bool", - must_not=[Q("exists", field="properties.datetime")], filter=[ Q("exists", field="properties.start_datetime"), Q("exists", field="properties.end_datetime"), @@ -363,13 +417,8 @@ def apply_datetime_filter( properties__end_datetime={"gte": datetime_search["gte"]}, ), ], - ), - ] - - return ( - search.query(Q("bool", should=should, minimum_should_match=1)), - datetime_search, - ) + ) + return search.query(filter_query), datetime_search @staticmethod def apply_bbox_filter(search: Search, bbox: List): diff --git a/stac_fastapi/tests/api/test_api.py b/stac_fastapi/tests/api/test_api.py index cdf383f96..e74ab5600 100644 --- a/stac_fastapi/tests/api/test_api.py +++ b/stac_fastapi/tests/api/test_api.py @@ -1537,3 +1537,89 @@ async def test_search_max_item_limit( assert resp.status_code == 200 resp_json = resp.json() assert int(limit) == len(resp_json["features"]) + + +@pytest.mark.asyncio +async def test_use_datetime_true(app_client, load_test_data, txn_client, monkeypatch): + monkeypatch.setenv("USE_DATETIME", "true") + + test_collection = load_test_data("test_collection.json") + test_collection["id"] = "test-collection-datetime-true" + await create_collection(txn_client, test_collection) + + item = load_test_data("test_item.json") + + item1 = item.copy() + item1["id"] = "test-item-datetime" + item1["collection"] = test_collection["id"] + item1["properties"]["datetime"] = "2020-01-01T12:00:00Z" + await create_item(txn_client, item1) + + item2 = item.copy() + item2["id"] = "test-item-start-end" + item2["collection"] = test_collection["id"] + item1["properties"]["datetime"] = None + item2["properties"]["start_datetime"] = "2020-01-01T10:00:00Z" + item2["properties"]["end_datetime"] = "2020-01-01T13:00:00Z" + await create_item(txn_client, item2) + + resp = await app_client.post( + "/search", + json={ + "datetime": "2020-01-01T12:00:00Z", + "collections": [test_collection["id"]], + }, + ) + + assert resp.status_code == 200 + resp_json = resp.json() + + found_ids = {feature["id"] for feature in resp_json["features"]} + assert "test-item-datetime" in found_ids + assert "test-item-start-end" in found_ids + + +@pytest.mark.asyncio +async def test_use_datetime_false(app_client, load_test_data, txn_client, monkeypatch): + monkeypatch.setenv("USE_DATETIME", "false") + + test_collection = load_test_data("test_collection.json") + test_collection["id"] = "test-collection-datetime-false" + await create_collection(txn_client, test_collection) + + item = load_test_data("test_item.json") + + # Item 1: Should NOT be found + item1 = item.copy() + item1["id"] = "test-item-datetime-only" + item1["collection"] = test_collection["id"] + item1["properties"]["datetime"] = "2020-01-01T12:00:00Z" + item1["properties"]["start_datetime"] = "2021-01-01T10:00:00Z" + item1["properties"]["end_datetime"] = "2021-01-01T14:00:00Z" + await create_item(txn_client, item1) + + # Item 2: Should be found + item2 = item.copy() + item2["id"] = "test-item-start-end-only" + item2["collection"] = test_collection["id"] + item2["properties"]["datetime"] = None + item2["properties"]["start_datetime"] = "2020-01-01T10:00:00Z" + item2["properties"]["end_datetime"] = "2020-01-01T14:00:00Z" + await create_item(txn_client, item2) + + resp = await app_client.post( + "/search", + json={ + "datetime": "2020-01-01T12:00:00Z", + "collections": [test_collection["id"]], + "limit": 10, + }, + ) + + assert resp.status_code == 200 + resp_json = resp.json() + + found_ids = {feature["id"] for feature in resp_json["features"]} + + assert "test-item-datetime-only" not in found_ids + assert "test-item-start-end-only" in found_ids From cb7bedfb4879ac2a04bd927328e56fc8206658d3 Mon Sep 17 00:00:00 2001 From: Yuri Zmytrakov Date: Tue, 23 Sep 2025 16:39:13 +0200 Subject: [PATCH 2/2] docs: USE_DATETIME env var description --- CHANGELOG.md | 4 ++++ README.md | 1 + 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51d372bba..c6115b971 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Added + +- Added `USE_DATETIME` environment variable to configure datetime search behavior in SFEOS. [#452](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/452) + ### Changed - unified the type of queryables endpoint to `application/schema+json`. [#445](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/445) diff --git a/README.md b/README.md index dd1734a0f..578a440a4 100644 --- a/README.md +++ b/README.md @@ -247,6 +247,7 @@ You can customize additional settings in your `.env` file: | `STAC_ITEM_LIMIT` | Sets the environment variable for result limiting to SFEOS for the number of returned items and STAC collections. | `10` | Optional | | `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional | | `ENV_MAX_LIMIT` | Configures the environment variable in SFEOS to override the default `MAX_LIMIT`, which controls the limit parameter for returned items and STAC collections. | `10,000` | Optional | +| `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | True | Optional | > [!NOTE] > The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.