Skip to content

Commit 26303d3

Browse files
committed
Implement restriction selectivity estimators for <@(spoint, scircle)
This implements restriction selectivity estimation for the <@ @> !<@ !@> family of operators on spoint and scircle. The selectivity is estimated to be (area of sphere circle) / (4 pi). Queries like `select * from sky where sky.star <@ scircle(const, radius)` will be able to properly estimate if using an index is appropriate depending on the size of radius. Secondly, a function spoint_dwithin(p1 spoint, p2 spoint, radius float8) is added that effectively returns `p1 <-> p2 <= radius`. But other than this two-operator expression, it has GIST index support so the optimizer can rewrite it to either `p1 <@ scircle(p2, radius)` or `p2 <@ scircle(p1, radius)`, i.e. it is symmetric in the first two arguments. This allows efficient matching queries without the user having to encode the join ordering in the query. On PostgreSQL 10/11, the spoint_dwithin function is created, but without the GIST support since that only appeared in PG12. The file expected/selectivity_1.out is used on PG10/11; it has <@ flipped around to @> in some plans.
1 parent 1366e73 commit 26303d3

14 files changed

+1104
-8
lines changed

Makefile

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ SRC_DIR = $(shell basename $(shell pwd))
1010

1111
MODULE_big = pg_sphere
1212
OBJS = src/sscan.o src/sparse.o src/sbuffer.o src/vector3d.o src/point.o \
13-
src/euler.o src/circle.o src/line.o src/ellipse.o src/polygon.o \
14-
src/path.o src/box.o src/output.o src/gq_cache.o src/gist.o \
13+
src/euler.o src/circle.o src/circle_sel.o src/line.o src/ellipse.o src/polygon.o \
14+
src/path.o src/box.o src/output.o src/gq_cache.o src/gist.o src/gist_support.o \
1515
src/key.o src/gnomo.o src/epochprop.o src/brin.o
1616

1717
ifneq ($(USE_HEALPIX),0)
@@ -36,15 +36,15 @@ DATA_built = $(RELEASE_SQL) \
3636
DOCS = README.pg_sphere COPYRIGHT.pg_sphere
3737
REGRESS = init tables points euler circle line ellipse poly path box index \
3838
contains_ops contains_ops_compat bounding_box_gist gnomo epochprop \
39-
contains overlaps spoint_brin sbox_brin
39+
contains overlaps spoint_brin sbox_brin selectivity
4040

4141
ifneq ($(USE_HEALPIX),0)
4242
REGRESS += healpix moc mocautocast
4343
endif
4444

4545
TESTS = init_test tables points euler circle line ellipse poly path box \
4646
index contains_ops contains_ops_compat bounding_box_gist gnomo \
47-
epochprop contains overlaps spoint_brin sbox_brin
47+
epochprop contains overlaps spoint_brin sbox_brin selectivity
4848

4949
ifneq ($(USE_HEALPIX),0)
5050
TESTS += healpix moc mocautocast
@@ -67,7 +67,7 @@ CRUSH_TESTS = init_extended circle_extended
6767
PGS_SQL = pgs_types.sql pgs_point.sql pgs_euler.sql pgs_circle.sql \
6868
pgs_line.sql pgs_ellipse.sql pgs_polygon.sql pgs_path.sql \
6969
pgs_box.sql pgs_contains_ops.sql pgs_contains_ops_compat.sql \
70-
pgs_gist.sql gnomo.sql pgs_brin.sql
70+
pgs_gist.sql gnomo.sql pgs_brin.sql pgs_circle_sel.sql
7171

7272
ifneq ($(USE_HEALPIX),0)
7373
PGS_SQL += healpix.sql
@@ -110,6 +110,7 @@ healpix_bare/healpix_bare.o : healpix_bare/healpix_bare.c
110110

111111
pg_version := $(word 2,$(shell $(PG_CONFIG) --version))
112112
has_explain_summary = $(if $(filter-out 9.%,$(pg_version)),y,n)
113+
has_support_functions = $(if $(filter-out 9.% 10.% 11.%,$(pg_version)),y,n)
113114

114115
crushtest: REGRESS += $(CRUSH_TESTS)
115116
crushtest: installcheck
@@ -120,6 +121,12 @@ ifeq ($(has_explain_summary),y)
120121
endif
121122
endif
122123

124+
ifeq ($(has_support_functions),y)
125+
PGS_SQL += pgs_gist_support.sql
126+
REGRESS += gist_support
127+
TESTS += gist_support
128+
endif
129+
123130
test: pg_sphere.test.sql
124131
$(pg_regress_installcheck) --temp-instance=tmp_check $(REGRESS_OPTS) $(TESTS)
125132

@@ -241,8 +248,11 @@ pg_sphere--1.2.3--1.3.0.sql: pgs_brin.sql.in
241248
pg_sphere--1.3.0--1.3.1.sql:
242249
cat upgrade_scripts/$@.in > $@
243250

244-
pg_sphere--1.3.1--1.3.2.sql:
245-
cat upgrade_scripts/$@.in > $@
251+
ifeq ($(has_support_functions),y)
252+
pg_sphere--1.3.1--1.3.2.sql: pgs_gist_support.sql.in
253+
endif
254+
pg_sphere--1.3.1--1.3.2.sql: pgs_circle_sel.sql.in
255+
cat upgrade_scripts/$@.in $^ > $@
246256

247257
# end of local stuff
248258

doc/functions.sgm

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,39 @@
149149
</example>
150150
</sect2>
151151

152+
<sect2 id="func.spoint.dist">
153+
<title>
154+
Point distance function
155+
</title>
156+
<para>
157+
The function
158+
</para>
159+
<funcsynopsis>
160+
<funcprototype>
161+
<funcdef><function>spoint_dwithin</function></funcdef>
162+
<paramdef>spoint <parameter>p1</parameter></paramdef>
163+
<paramdef>spoint <parameter>p2</parameter></paramdef>
164+
<paramdef>float8 <parameter>radius</parameter></paramdef>
165+
</funcprototype>
166+
</funcsynopsis>
167+
<para>
168+
returns if points <parameter>p1</parameter> and <parameter>p2</parameter>
169+
lie within distance <parameter>radius</parameter> of each other.
170+
On PostgreSQL 12 and later, the function has <literal>GIST</literal>
171+
support and the PostgreSQL optimizer will transform it to either
172+
<literal>p1 &lt;@ scircle(p2, radius)</literal> or
173+
<literal>p2 &lt;@ scircle(p1, radius)</literal> where appropriate.
174+
</para>
175+
<example>
176+
<title>
177+
Efficiently join two tables of points with some fuzziness permitted
178+
</title>
179+
<programlisting>
180+
<![CDATA[sql> SELECT * FROM stars1 JOIN stars2 WHERE spoint_dwithin(stars1.s, stars2.s, 1e-5);]]>
181+
</programlisting>
182+
</example>
183+
</sect2>
184+
152185
</sect1>
153186

154187
<sect1 id="funcs.strans">

expected/gist_support.out

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
-- spoint_dwithin function selectivity
2+
set jit = off; -- suppress extra planning output
3+
select explain('select * from spoint10k where spoint_dwithin(star, spoint(1,1), 1)');
4+
explain
5+
-----------------------------------------------------------------------------------------------
6+
Bitmap Heap Scan on spoint10k (rows=2298 width=16) (actual rows=3009 loops=1)
7+
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '1'::double precision)
8+
Rows Removed by Filter: 1560
9+
Heap Blocks: exact=55
10+
-> Bitmap Index Scan on spoint10k_star_idx (rows=2298 width=0) (actual rows=4569 loops=1)
11+
Index Cond: (star <@ '<(1 , 1) , 1>'::scircle)
12+
(6 rows)
13+
14+
select explain('select * from spoint10k where spoint_dwithin(star, spoint(1,1), .1)');
15+
explain
16+
-------------------------------------------------------------------------------------------
17+
Bitmap Heap Scan on spoint10k (rows=25 width=16) (actual rows=29 loops=1)
18+
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.1'::double precision)
19+
Rows Removed by Filter: 19
20+
Heap Blocks: exact=32
21+
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
22+
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
23+
(6 rows)
24+
25+
select explain('select * from spoint10k where spoint_dwithin(star, spoint(1,1), .01)');
26+
explain
27+
---------------------------------------------------------------------------------------------
28+
Index Scan using spoint10k_star_idx on spoint10k (rows=1 width=16) (actual rows=1 loops=1)
29+
Index Cond: (star <@ '<(1 , 1) , 0.01>'::scircle)
30+
(2 rows)
31+
32+
select explain('select * from spoint10k where spoint_dwithin(spoint(1,1), star, 1)');
33+
explain
34+
-----------------------------------------------------------------------------------------------
35+
Bitmap Heap Scan on spoint10k (rows=2298 width=16) (actual rows=3009 loops=1)
36+
Filter: spoint_dwithin('(1 , 1)'::spoint, star, '1'::double precision)
37+
Rows Removed by Filter: 1560
38+
Heap Blocks: exact=55
39+
-> Bitmap Index Scan on spoint10k_star_idx (rows=2298 width=0) (actual rows=4569 loops=1)
40+
Index Cond: (star <@ '<(1 , 1) , 1>'::scircle)
41+
(6 rows)
42+
43+
select explain('select * from spoint10k where spoint_dwithin(spoint(1,1), star, .1)');
44+
explain
45+
-------------------------------------------------------------------------------------------
46+
Bitmap Heap Scan on spoint10k (rows=25 width=16) (actual rows=29 loops=1)
47+
Filter: spoint_dwithin('(1 , 1)'::spoint, star, '0.1'::double precision)
48+
Rows Removed by Filter: 19
49+
Heap Blocks: exact=32
50+
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
51+
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
52+
(6 rows)
53+
54+
select explain('select * from spoint10k where spoint_dwithin(spoint(1,1), star, .01)');
55+
explain
56+
---------------------------------------------------------------------------------------------
57+
Index Scan using spoint10k_star_idx on spoint10k (rows=1 width=16) (actual rows=1 loops=1)
58+
Index Cond: (star <@ '<(1 , 1) , 0.01>'::scircle)
59+
(2 rows)
60+
61+
select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, 1)', do_analyze := 'false');
62+
explain
63+
---------------------------------------------------------------------------------------
64+
Nested Loop (rows=22984885 width=32)
65+
-> Seq Scan on spoint10k a (rows=10000 width=16)
66+
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=2298 width=16)
67+
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '1'::double precision))
68+
(4 rows)
69+
70+
select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .1)');
71+
explain
72+
-----------------------------------------------------------------------------------------------------------
73+
Nested Loop (rows=249792 width=32) (actual rows=505342 loops=1)
74+
-> Seq Scan on spoint10k a (rows=10000 width=16) (actual rows=10000 loops=1)
75+
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=25 width=16) (actual rows=51 loops=10000)
76+
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.1'::double precision))
77+
Rows Removed by Index Recheck: 31
78+
(5 rows)
79+
80+
select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)');
81+
explain
82+
---------------------------------------------------------------------------------------------------------
83+
Nested Loop (rows=2500 width=32) (actual rows=17614 loops=1)
84+
-> Seq Scan on spoint10k a (rows=10000 width=16) (actual rows=10000 loops=1)
85+
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=2 loops=10000)
86+
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision))
87+
Rows Removed by Index Recheck: 1
88+
(5 rows)
89+
90+
-- spoint_dwithin is symmetric in the first two arguments
91+
select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)
92+
where spoint_dwithin(a.star, spoint(1,1), .1)');
93+
explain
94+
------------------------------------------------------------------------------------------------------
95+
Nested Loop (rows=6 width=32) (actual rows=33 loops=1)
96+
-> Bitmap Heap Scan on spoint10k a (rows=25 width=16) (actual rows=29 loops=1)
97+
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.1'::double precision)
98+
Rows Removed by Filter: 19
99+
Heap Blocks: exact=32
100+
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
101+
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
102+
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=1 loops=29)
103+
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision))
104+
Rows Removed by Index Recheck: 0
105+
(10 rows)
106+
107+
select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(b.star, a.star, .01)
108+
where spoint_dwithin(a.star, spoint(1,1), .1)');
109+
explain
110+
------------------------------------------------------------------------------------------------------
111+
Nested Loop (rows=6 width=32) (actual rows=33 loops=1)
112+
-> Bitmap Heap Scan on spoint10k a (rows=25 width=16) (actual rows=29 loops=1)
113+
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.1'::double precision)
114+
Rows Removed by Filter: 19
115+
Heap Blocks: exact=32
116+
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
117+
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
118+
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=1 loops=29)
119+
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision))
120+
Rows Removed by Index Recheck: 0
121+
(10 rows)
122+
123+
-- both sides indexable, check if the planner figures out the better choice
124+
select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)
125+
where spoint_dwithin(a.star, spoint(1,1), .1) and spoint_dwithin(b.star, spoint(1,1), .05)');
126+
explain
127+
-------------------------------------------------------------------------------------------------------------------------------------
128+
Nested Loop (rows=1 width=32) (actual rows=16 loops=1)
129+
-> Bitmap Heap Scan on spoint10k b (rows=6 width=16) (actual rows=12 loops=1)
130+
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.05'::double precision)
131+
Rows Removed by Filter: 4
132+
Heap Blocks: exact=14
133+
-> Bitmap Index Scan on spoint10k_star_idx (rows=6 width=0) (actual rows=16 loops=1)
134+
Index Cond: (star <@ '<(1 , 1) , 0.05>'::scircle)
135+
-> Index Scan using spoint10k_star_idx on spoint10k a (rows=1 width=16) (actual rows=1 loops=12)
136+
Index Cond: ((star OPERATOR(public.<@) scircle(b.star, '0.01'::double precision)) AND (star <@ '<(1 , 1) , 0.1>'::scircle))
137+
Rows Removed by Index Recheck: 0
138+
(10 rows)
139+
140+
select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)
141+
where spoint_dwithin(a.star, spoint(1,1), .05) and spoint_dwithin(b.star, spoint(1,1), .1)');
142+
explain
143+
-------------------------------------------------------------------------------------------------------------------------------------
144+
Nested Loop (rows=1 width=32) (actual rows=16 loops=1)
145+
-> Bitmap Heap Scan on spoint10k a (rows=6 width=16) (actual rows=12 loops=1)
146+
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.05'::double precision)
147+
Rows Removed by Filter: 4
148+
Heap Blocks: exact=14
149+
-> Bitmap Index Scan on spoint10k_star_idx (rows=6 width=0) (actual rows=16 loops=1)
150+
Index Cond: (star <@ '<(1 , 1) , 0.05>'::scircle)
151+
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=1 loops=12)
152+
Index Cond: ((star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision)) AND (star <@ '<(1 , 1) , 0.1>'::scircle))
153+
Rows Removed by Index Recheck: 0
154+
(10 rows)
155+

0 commit comments

Comments
 (0)