diff --git a/CHANGELOG.md b/CHANGELOG.md index e26ad22d1..e79dca0ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Added `USE_DATETIME` environment variable to configure datetime search behavior in SFEOS. [#452](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/452) - GET `/collections` collection search sort extension ex. `/collections?sortby=+id`. [#456](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/456) ### Changed diff --git a/README.md b/README.md index dd1734a0f..578a440a4 100644 --- a/README.md +++ b/README.md @@ -247,6 +247,7 @@ You can customize additional settings in your `.env` file: | `STAC_ITEM_LIMIT` | Sets the environment variable for result limiting to SFEOS for the number of returned items and STAC collections. | `10` | Optional | | `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional | | `ENV_MAX_LIMIT` | Configures the environment variable in SFEOS to override the default `MAX_LIMIT`, which controls the limit parameter for returned items and STAC collections. | `10,000` | Optional | +| `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | True | Optional | > [!NOTE] > The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch. diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index e61765ef7..df1e816db 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -17,7 +17,7 @@ from stac_fastapi.core.base_database_logic import BaseDatabaseLogic from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer -from stac_fastapi.core.utilities import bbox2polygon, get_max_limit +from stac_fastapi.core.utilities import bbox2polygon, get_bool_env, get_max_limit from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings from stac_fastapi.elasticsearch.config import ( ElasticsearchSettings as SyncElasticsearchSettings, @@ -310,26 +310,99 @@ def apply_datetime_filter( Returns: The filtered search object. """ + # USE_DATETIME env var + # True: Search by datetime, if null search by start/end datetime + # False: Always search only by start/end datetime + USE_DATETIME = get_bool_env("USE_DATETIME", default=True) + datetime_search = return_date(datetime) if not datetime_search: return search, datetime_search - if "eq" in datetime_search: - # For exact matches, include: - # 1. Items with matching exact datetime - # 2. Items with datetime:null where the time falls within their range - should = [ - Q( - "bool", - filter=[ - Q("exists", field="properties.datetime"), - Q("term", **{"properties__datetime": datetime_search["eq"]}), - ], - ), - Q( + if USE_DATETIME: + if "eq" in datetime_search: + # For exact matches, include: + # 1. Items with matching exact datetime + # 2. Items with datetime:null where the time falls within their range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "term", + **{"properties__datetime": datetime_search["eq"]}, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={ + "lte": datetime_search["eq"] + }, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["eq"]}, + ), + ], + ), + ] + else: + # For date ranges, include: + # 1. Items with datetime in the range + # 2. Items with datetime:null that overlap the search range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "range", + properties__datetime={ + "gte": datetime_search["gte"], + "lte": datetime_search["lte"], + }, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={ + "lte": datetime_search["lte"] + }, + ), + Q( + "range", + properties__end_datetime={ + "gte": datetime_search["gte"] + }, + ), + ], + ), + ] + + return ( + search.query(Q("bool", should=should, minimum_should_match=1)), + datetime_search, + ) + else: + if "eq" in datetime_search: + filter_query = Q( "bool", - must_not=[Q("exists", field="properties.datetime")], filter=[ Q("exists", field="properties.start_datetime"), Q("exists", field="properties.end_datetime"), @@ -342,29 +415,10 @@ def apply_datetime_filter( properties__end_datetime={"gte": datetime_search["eq"]}, ), ], - ), - ] - else: - # For date ranges, include: - # 1. Items with datetime in the range - # 2. Items with datetime:null that overlap the search range - should = [ - Q( - "bool", - filter=[ - Q("exists", field="properties.datetime"), - Q( - "range", - properties__datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( + ) + else: + filter_query = Q( "bool", - must_not=[Q("exists", field="properties.datetime")], filter=[ Q("exists", field="properties.start_datetime"), Q("exists", field="properties.end_datetime"), @@ -377,13 +431,8 @@ def apply_datetime_filter( properties__end_datetime={"gte": datetime_search["gte"]}, ), ], - ), - ] - - return ( - search.query(Q("bool", should=should, minimum_should_match=1)), - datetime_search, - ) + ) + return search.query(filter_query), datetime_search @staticmethod def apply_bbox_filter(search: Search, bbox: List): diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 558684df6..4253a00a7 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -17,7 +17,7 @@ from stac_fastapi.core.base_database_logic import BaseDatabaseLogic from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer -from stac_fastapi.core.utilities import bbox2polygon, get_max_limit +from stac_fastapi.core.utilities import bbox2polygon, get_bool_env, get_max_limit from stac_fastapi.extensions.core.transaction.request import ( PartialCollection, PartialItem, @@ -318,21 +318,94 @@ def apply_datetime_filter( if not datetime_search: return search, datetime_search - if "eq" in datetime_search: - # For exact matches, include: - # 1. Items with matching exact datetime - # 2. Items with datetime:null where the time falls within their range - should = [ - Q( - "bool", - filter=[ - Q("exists", field="properties.datetime"), - Q("term", **{"properties__datetime": datetime_search["eq"]}), - ], - ), - Q( + # USE_DATETIME env var + # True: Search by datetime, if null search by start/end datetime + # False: Always search only by start/end datetime + USE_DATETIME = get_bool_env("USE_DATETIME", default=True) + + if USE_DATETIME: + if "eq" in datetime_search: + # For exact matches, include: + # 1. Items with matching exact datetime + # 2. Items with datetime:null where the time falls within their range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "term", + **{"properties__datetime": datetime_search["eq"]}, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={ + "lte": datetime_search["eq"] + }, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["eq"]}, + ), + ], + ), + ] + else: + # For date ranges, include: + # 1. Items with datetime in the range + # 2. Items with datetime:null that overlap the search range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "range", + properties__datetime={ + "gte": datetime_search["gte"], + "lte": datetime_search["lte"], + }, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={ + "lte": datetime_search["lte"] + }, + ), + Q( + "range", + properties__end_datetime={ + "gte": datetime_search["gte"] + }, + ), + ], + ), + ] + + return ( + search.query(Q("bool", should=should, minimum_should_match=1)), + datetime_search, + ) + else: + if "eq" in datetime_search: + filter_query = Q( "bool", - must_not=[Q("exists", field="properties.datetime")], filter=[ Q("exists", field="properties.start_datetime"), Q("exists", field="properties.end_datetime"), @@ -345,29 +418,10 @@ def apply_datetime_filter( properties__end_datetime={"gte": datetime_search["eq"]}, ), ], - ), - ] - else: - # For date ranges, include: - # 1. Items with datetime in the range - # 2. Items with datetime:null that overlap the search range - should = [ - Q( - "bool", - filter=[ - Q("exists", field="properties.datetime"), - Q( - "range", - properties__datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( + ) + else: + filter_query = Q( "bool", - must_not=[Q("exists", field="properties.datetime")], filter=[ Q("exists", field="properties.start_datetime"), Q("exists", field="properties.end_datetime"), @@ -380,13 +434,8 @@ def apply_datetime_filter( properties__end_datetime={"gte": datetime_search["gte"]}, ), ], - ), - ] - - return ( - search.query(Q("bool", should=should, minimum_should_match=1)), - datetime_search, - ) + ) + return search.query(filter_query), datetime_search @staticmethod def apply_bbox_filter(search: Search, bbox: List): diff --git a/stac_fastapi/tests/api/test_api.py b/stac_fastapi/tests/api/test_api.py index cdf383f96..e74ab5600 100644 --- a/stac_fastapi/tests/api/test_api.py +++ b/stac_fastapi/tests/api/test_api.py @@ -1537,3 +1537,89 @@ async def test_search_max_item_limit( assert resp.status_code == 200 resp_json = resp.json() assert int(limit) == len(resp_json["features"]) + + +@pytest.mark.asyncio +async def test_use_datetime_true(app_client, load_test_data, txn_client, monkeypatch): + monkeypatch.setenv("USE_DATETIME", "true") + + test_collection = load_test_data("test_collection.json") + test_collection["id"] = "test-collection-datetime-true" + await create_collection(txn_client, test_collection) + + item = load_test_data("test_item.json") + + item1 = item.copy() + item1["id"] = "test-item-datetime" + item1["collection"] = test_collection["id"] + item1["properties"]["datetime"] = "2020-01-01T12:00:00Z" + await create_item(txn_client, item1) + + item2 = item.copy() + item2["id"] = "test-item-start-end" + item2["collection"] = test_collection["id"] + item1["properties"]["datetime"] = None + item2["properties"]["start_datetime"] = "2020-01-01T10:00:00Z" + item2["properties"]["end_datetime"] = "2020-01-01T13:00:00Z" + await create_item(txn_client, item2) + + resp = await app_client.post( + "/search", + json={ + "datetime": "2020-01-01T12:00:00Z", + "collections": [test_collection["id"]], + }, + ) + + assert resp.status_code == 200 + resp_json = resp.json() + + found_ids = {feature["id"] for feature in resp_json["features"]} + assert "test-item-datetime" in found_ids + assert "test-item-start-end" in found_ids + + +@pytest.mark.asyncio +async def test_use_datetime_false(app_client, load_test_data, txn_client, monkeypatch): + monkeypatch.setenv("USE_DATETIME", "false") + + test_collection = load_test_data("test_collection.json") + test_collection["id"] = "test-collection-datetime-false" + await create_collection(txn_client, test_collection) + + item = load_test_data("test_item.json") + + # Item 1: Should NOT be found + item1 = item.copy() + item1["id"] = "test-item-datetime-only" + item1["collection"] = test_collection["id"] + item1["properties"]["datetime"] = "2020-01-01T12:00:00Z" + item1["properties"]["start_datetime"] = "2021-01-01T10:00:00Z" + item1["properties"]["end_datetime"] = "2021-01-01T14:00:00Z" + await create_item(txn_client, item1) + + # Item 2: Should be found + item2 = item.copy() + item2["id"] = "test-item-start-end-only" + item2["collection"] = test_collection["id"] + item2["properties"]["datetime"] = None + item2["properties"]["start_datetime"] = "2020-01-01T10:00:00Z" + item2["properties"]["end_datetime"] = "2020-01-01T14:00:00Z" + await create_item(txn_client, item2) + + resp = await app_client.post( + "/search", + json={ + "datetime": "2020-01-01T12:00:00Z", + "collections": [test_collection["id"]], + "limit": 10, + }, + ) + + assert resp.status_code == 200 + resp_json = resp.json() + + found_ids = {feature["id"] for feature in resp_json["features"]} + + assert "test-item-datetime-only" not in found_ids + assert "test-item-start-end-only" in found_ids