Skip to content

Commit 2f00dbe

Browse files
authored
add q parameter for free-text search (#295)
* add q parameter for free-text search * handle quoted statements, add more tests
1 parent 15f42d8 commit 2f00dbe

File tree

10 files changed

+5406
-46
lines changed

10 files changed

+5406
-46
lines changed

CHANGELOG.md

Lines changed: 91 additions & 41 deletions
Large diffs are not rendered by default.

src/pgstac/migrations/pgstac.0.9.1-unreleased.sql

Lines changed: 477 additions & 0 deletions
Large diffs are not rendered by default.

src/pgstac/migrations/pgstac.unreleased.sql

Lines changed: 4564 additions & 0 deletions
Large diffs are not rendered by default.

src/pgstac/pgstac.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
migrations/pgstac.0.9.1.sql
1+
migrations/pgstac.unreleased.sql

src/pgstac/sql/004_search.sql

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,59 @@ CREATE OR REPLACE FUNCTION partition_query_view(
127127
$$ LANGUAGE SQL IMMUTABLE;
128128

129129

130+
CREATE OR REPLACE FUNCTION q_to_tsquery (input text)
131+
RETURNS tsquery
132+
AS $$
133+
DECLARE
134+
processed_text text;
135+
temp_text text;
136+
quote_array text[];
137+
placeholder text := '@QUOTE@';
138+
BEGIN
139+
-- Extract all quoted phrases and store in array
140+
quote_array := regexp_matches(input, '"[^"]*"', 'g');
141+
142+
-- Replace each quoted part with a unique placeholder if there are any quoted phrases
143+
IF array_length(quote_array, 1) IS NOT NULL THEN
144+
processed_text := input;
145+
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
146+
processed_text := replace(processed_text, quote_array[i], placeholder || i || placeholder);
147+
END LOOP;
148+
ELSE
149+
processed_text := input;
150+
END IF;
151+
152+
-- Replace non-quoted text using regular expressions
153+
154+
-- , -> |
155+
processed_text := regexp_replace(processed_text, ',(?=(?:[^"]*"[^"]*")*[^"]*$)', ' | ', 'g');
156+
157+
-- and -> &
158+
processed_text := regexp_replace(processed_text, '\s+AND\s+', ' & ', 'gi');
159+
160+
-- or -> |
161+
processed_text := regexp_replace(processed_text, '\s+OR\s+', ' | ', 'gi');
162+
163+
-- +term -> & term
164+
processed_text := regexp_replace(processed_text, '\+([a-zA-Z0-9_]+)', '& \1', 'g');
165+
166+
-- -term -> ! term
167+
processed_text := regexp_replace(processed_text, '\-([a-zA-Z0-9_]+)', '& ! \1', 'g');
168+
169+
-- Replace placeholders back with quoted phrases if there were any
170+
IF array_length(quote_array, 1) IS NOT NULL THEN
171+
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
172+
processed_text := replace(processed_text, placeholder || i || placeholder, '''' || substring(quote_array[i] from 2 for length(quote_array[i]) - 2) || '''');
173+
END LOOP;
174+
END IF;
175+
176+
-- Print processed_text to the console for debugging purposes
177+
RAISE NOTICE 'processed_text: %', processed_text;
178+
179+
RETURN to_tsquery(processed_text);
180+
END;
181+
$$
182+
LANGUAGE plpgsql;
130183

131184

132185
CREATE OR REPLACE FUNCTION stac_search_to_where(j jsonb) RETURNS text AS $$
@@ -140,6 +193,7 @@ DECLARE
140193
edate timestamptz;
141194
filterlang text;
142195
filter jsonb := j->'filter';
196+
ft_query tsquery;
143197
BEGIN
144198
IF j ? 'ids' THEN
145199
where_segments := where_segments || format('id = ANY (%L) ', to_text_array(j->'ids'));
@@ -161,6 +215,20 @@ BEGIN
161215
);
162216
END IF;
163217

218+
IF j ? 'q' THEN
219+
ft_query := q_to_tsquery(j->>'q');
220+
where_segments := where_segments || format(
221+
$quote$
222+
(
223+
to_tsvector('english', content->'properties'->>'description') ||
224+
to_tsvector('english', content->'properties'->>'title') ||
225+
to_tsvector('english', content->'properties'->'keywords')
226+
) @@ %L
227+
$quote$,
228+
ft_query
229+
);
230+
END IF;
231+
164232
geom := stac_geom(j);
165233
IF geom IS NOT NULL THEN
166234
where_segments := where_segments || format('st_intersects(geometry, %L)',geom);

src/pgstac/sql/999_version.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
SELECT set_version('0.9.1');
1+
SELECT set_version('unreleased');
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
2+
SET pgstac.context TO 'on';
3+
SET pgstac."default_filter_lang" TO 'cql2-json';
4+
5+
CREATE TEMP TABLE temp_collections (
6+
id SERIAL PRIMARY KEY,
7+
title TEXT,
8+
description TEXT,
9+
keywords TEXT,
10+
minx NUMERIC,
11+
miny NUMERIC,
12+
maxx NUMERIC,
13+
maxy NUMERIC,
14+
sdt TIMESTAMPTZ,
15+
edt TIMESTAMPTZ
16+
);
17+
18+
INSERT INTO temp_collections (
19+
title, description, keywords, minx, miny, maxx, maxy, sdt, edt
20+
) VALUES
21+
(
22+
'Stranger Things',
23+
'Some teenagers drop out of school to fight monsters',
24+
'monster, scary, dark, 80s',
25+
-180, -90, 180, 90,
26+
'2016-01-01T00:00:00Z',
27+
'2025-12-31T23:59:59Z'
28+
),
29+
(
30+
'The Bear',
31+
'Another story about why you should not start a restaurant',
32+
'restaurant, funny, sad, great',
33+
-180, -90, 180, 90,
34+
'2022-01-01T00:00:00Z',
35+
'2025-12-31T23:59:59Z'
36+
),
37+
(
38+
'Godzilla',
39+
'A large lizard takes its revenge',
40+
'scary, lizard, monster',
41+
-180, -90, 180, 90,
42+
'1954-01-01T00:00:00Z',
43+
null
44+
),
45+
(
46+
'Chefs Table',
47+
'Another great story that make you wonder if you should go to a restaurant',
48+
'restaurant, food, michelin',
49+
-180, -90, 180, 90,
50+
'2019-01-01T00:00:00Z',
51+
'2025-12-31T23:59:59Z'
52+
);
53+
54+
SELECT
55+
create_collection(jsonb_build_object(
56+
'id', format('testcollection_%s', id),
57+
'type', 'Collection',
58+
'title', title,
59+
'description', description,
60+
'extent', jsonb_build_object(
61+
'spatial', jsonb_build_array(jsonb_build_array(minx, miny, maxx, maxy)),
62+
'temporal', jsonb_build_array(jsonb_build_array(sdt, edt))
63+
),
64+
'stac_extensions', jsonb_build_array(),
65+
'keywords', string_to_array(keywords, ', ')
66+
))
67+
FROM temp_collections;
68+
69+
select collection_search('{"q": "monsters"}');
70+
71+
select collection_search('{"q": "lizard"}');
72+
73+
select collection_search('{"q": "scary OR funny"}');
74+
75+
select collection_search('{"q": "(scary AND revenge) OR (funny AND sad)"}');
76+
77+
select collection_search('{"q": "\"great story\""}');
78+
79+
select collection_search('{"q": "monster -school"}');
80+
81+
select collection_search('{"q": "bear or stranger"}');
82+
83+
select collection_search('{"q": "bear OR stranger"}');
84+
85+
select collection_search('{"q": "bear, stranger"}');
86+
87+
select collection_search('{"q": "bear AND stranger"}');
88+
89+
select collection_search('{"q": "bear and stranger"}');
90+
91+
select collection_search('{"q": "\"bear or stranger\""}');
92+
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
SET pgstac.context TO 'on';
2+
SET
3+
SET pgstac."default_filter_lang" TO 'cql2-json';
4+
SET
5+
CREATE TEMP TABLE temp_collections (
6+
id SERIAL PRIMARY KEY,
7+
title TEXT,
8+
description TEXT,
9+
keywords TEXT,
10+
minx NUMERIC,
11+
miny NUMERIC,
12+
maxx NUMERIC,
13+
maxy NUMERIC,
14+
sdt TIMESTAMPTZ,
15+
edt TIMESTAMPTZ
16+
);
17+
CREATE TABLE
18+
INSERT INTO temp_collections (
19+
title, description, keywords, minx, miny, maxx, maxy, sdt, edt
20+
) VALUES
21+
(
22+
'Stranger Things',
23+
'Some teenagers drop out of school to fight monsters',
24+
'monster, scary, dark, 80s',
25+
-180, -90, 180, 90,
26+
'2016-01-01T00:00:00Z',
27+
'2025-12-31T23:59:59Z'
28+
),
29+
(
30+
'The Bear',
31+
'Another story about why you should not start a restaurant',
32+
'restaurant, funny, sad, great',
33+
-180, -90, 180, 90,
34+
'2022-01-01T00:00:00Z',
35+
'2025-12-31T23:59:59Z'
36+
),
37+
(
38+
'Godzilla',
39+
'A large lizard takes its revenge',
40+
'scary, lizard, monster',
41+
-180, -90, 180, 90,
42+
'1954-01-01T00:00:00Z',
43+
null
44+
),
45+
(
46+
'Chefs Table',
47+
'Another great story that make you wonder if you should go to a restaurant',
48+
'restaurant, food, michelin',
49+
-180, -90, 180, 90,
50+
'2019-01-01T00:00:00Z',
51+
'2025-12-31T23:59:59Z'
52+
);
53+
INSERT 0 4
54+
SELECT
55+
create_collection(jsonb_build_object(
56+
'id', format('testcollection_%s', id),
57+
'type', 'Collection',
58+
'title', title,
59+
'description', description,
60+
'extent', jsonb_build_object(
61+
'spatial', jsonb_build_array(jsonb_build_array(minx, miny, maxx, maxy)),
62+
'temporal', jsonb_build_array(jsonb_build_array(sdt, edt))
63+
),
64+
'stac_extensions', jsonb_build_array(),
65+
'keywords', string_to_array(keywords, ', ')
66+
))
67+
FROM temp_collections;
68+
69+
70+
71+
72+
select collection_search('{"q": "monsters"}');
73+
{"links": [], "collections": [{"id": "testcollection_1", "type": "Collection", "title": "Stranger Things", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2016-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["monster", "scary", "dark", "80s"], "description": "Some teenagers drop out of school to fight monsters", "stac_extensions": []}, {"id": "testcollection_3", "type": "Collection", "title": "Godzilla", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["1954-01-01T00:00:00+00:00", null]]}, "keywords": ["scary", "lizard", "monster"], "description": "A large lizard takes its revenge", "stac_extensions": []}], "numberMatched": 2, "numberReturned": 2}
74+
75+
76+
select collection_search('{"q": "lizard"}');
77+
{"links": [], "collections": [{"id": "testcollection_3", "type": "Collection", "title": "Godzilla", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["1954-01-01T00:00:00+00:00", null]]}, "keywords": ["scary", "lizard", "monster"], "description": "A large lizard takes its revenge", "stac_extensions": []}], "numberMatched": 1, "numberReturned": 1}
78+
79+
80+
select collection_search('{"q": "scary OR funny"}');
81+
{"links": [], "collections": [{"id": "testcollection_1", "type": "Collection", "title": "Stranger Things", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2016-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["monster", "scary", "dark", "80s"], "description": "Some teenagers drop out of school to fight monsters", "stac_extensions": []}, {"id": "testcollection_2", "type": "Collection", "title": "The Bear", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2022-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["restaurant", "funny", "sad", "great"], "description": "Another story about why you should not start a restaurant", "stac_extensions": []}, {"id": "testcollection_3", "type": "Collection", "title": "Godzilla", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["1954-01-01T00:00:00+00:00", null]]}, "keywords": ["scary", "lizard", "monster"], "description": "A large lizard takes its revenge", "stac_extensions": []}], "numberMatched": 3, "numberReturned": 3}
82+
83+
select collection_search('{"q": "(scary AND revenge) OR (funny AND sad)"}');
84+
{"links": [], "collections": [{"id": "testcollection_2", "type": "Collection", "title": "The Bear", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2022-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["restaurant", "funny", "sad", "great"], "description": "Another story about why you should not start a restaurant", "stac_extensions": []}, {"id": "testcollection_3", "type": "Collection", "title": "Godzilla", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["1954-01-01T00:00:00+00:00", null]]}, "keywords": ["scary", "lizard", "monster"], "description": "A large lizard takes its revenge", "stac_extensions": []}], "numberMatched": 2, "numberReturned": 2}
85+
86+
select collection_search('{"q": "\"great story\""}');
87+
{"links": [], "collections": [{"id": "testcollection_4", "type": "Collection", "title": "Chefs Table", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2019-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["restaurant", "food", "michelin"], "description": "Another great story that make you wonder if you should go to a restaurant", "stac_extensions": []}], "numberMatched": 1, "numberReturned": 1}
88+
89+
select collection_search('{"q": "monster -school"}');
90+
{"links": [], "collections": [{"id": "testcollection_3", "type": "Collection", "title": "Godzilla", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["1954-01-01T00:00:00+00:00", null]]}, "keywords": ["scary", "lizard", "monster"], "description": "A large lizard takes its revenge", "stac_extensions": []}], "numberMatched": 1, "numberReturned": 1}
91+
92+
select collection_search('{"q": "bear or stranger"}');
93+
{"links": [], "collections": [{"id": "testcollection_1", "type": "Collection", "title": "Stranger Things", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2016-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["monster", "scary", "dark", "80s"], "description": "Some teenagers drop out of school to fight monsters", "stac_extensions": []}, {"id": "testcollection_2", "type": "Collection", "title": "The Bear", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2022-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["restaurant", "funny", "sad", "great"], "description": "Another story about why you should not start a restaurant", "stac_extensions": []}], "numberMatched": 2, "numberReturned": 2}
94+
95+
select collection_search('{"q": "bear OR stranger"}');
96+
{"links": [], "collections": [{"id": "testcollection_1", "type": "Collection", "title": "Stranger Things", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2016-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["monster", "scary", "dark", "80s"], "description": "Some teenagers drop out of school to fight monsters", "stac_extensions": []}, {"id": "testcollection_2", "type": "Collection", "title": "The Bear", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2022-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["restaurant", "funny", "sad", "great"], "description": "Another story about why you should not start a restaurant", "stac_extensions": []}], "numberMatched": 2, "numberReturned": 2}
97+
98+
select collection_search('{"q": "bear, stranger"}');
99+
{"links": [], "collections": [{"id": "testcollection_1", "type": "Collection", "title": "Stranger Things", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2016-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["monster", "scary", "dark", "80s"], "description": "Some teenagers drop out of school to fight monsters", "stac_extensions": []}, {"id": "testcollection_2", "type": "Collection", "title": "The Bear", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2022-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["restaurant", "funny", "sad", "great"], "description": "Another story about why you should not start a restaurant", "stac_extensions": []}], "numberMatched": 2, "numberReturned": 2}
100+
101+
select collection_search('{"q": "bear AND stranger"}');
102+
{"links": [], "collections": [], "numberMatched": 0, "numberReturned": 0}
103+
104+
select collection_search('{"q": "bear and stranger"}');
105+
{"links": [], "collections": [], "numberMatched": 0, "numberReturned": 0}
106+
107+
select collection_search('{"q": "\"bear or stranger\""}');
108+
{"links": [], "collections": [], "numberMatched": 0, "numberReturned": 0}
109+

src/pypgstac/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "pypgstac"
3-
version = "0.9.1"
3+
version = "0.9.1-dev"
44
description = "Schema, functions and a python library for storing and accessing STAC collections and items in PostgreSQL"
55
readme = "README.md"
66
requires-python = ">=3.8"
@@ -46,7 +46,7 @@ dev = [
4646
"black>=21.7b0",
4747
"mypy>=0.910",
4848
"types-orjson==0.1.1",
49-
"types-pkg-resources",
49+
"types-setuptools",
5050
"ruff==0.0.231",
5151
"pre-commit",
5252
]
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
"""Version."""
2-
__version__ = "0.9.1"
2+
__version__ = "0.9.1-dev"

0 commit comments

Comments
 (0)