From ed91c70822596e59809ed3dd3b378c2dfa69a497 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Tue, 9 Sep 2025 00:16:48 +0200 Subject: [PATCH 1/4] more mvt --- src/spatial/modules/geos/geos_geometry.hpp | 44 ++ src/spatial/modules/geos/geos_module.cpp | 226 ++++++ src/spatial/modules/mvt/mvt_module.cpp | 820 ++++++++++++++++++++- 3 files changed, 1079 insertions(+), 11 deletions(-) diff --git a/src/spatial/modules/geos/geos_geometry.hpp b/src/spatial/modules/geos/geos_geometry.hpp index ea871a76..6ebf063c 100644 --- a/src/spatial/modules/geos/geos_geometry.hpp +++ b/src/spatial/modules/geos/geos_geometry.hpp @@ -51,6 +51,13 @@ class GeosGeometry { GeosGeometry get_voronoi_diagram() const; GeosGeometry get_built_area() const; GeosGeometry get_noded() const; + GeosGeometry get_clipped(double xmin, double ymin, double xmax, double ymax) const; + + // matrix format: [a, b, c, d, e, f] + // x' = a*x + b*y + e + // y' = c*x + d*y + f + GeosGeometry get_transformed(const double matrix[6]) const; + GeosGeometry get_gridded(double grid_size) const; bool contains(const GeosGeometry &other) const; bool covers(const GeosGeometry &other) const; @@ -67,6 +74,7 @@ class GeosGeometry { double distance_to(const GeosGeometry &other) const; void normalize_in_place() const; + void orient_polygons(bool ext_cw); GeosGeometry get_difference(const GeosGeometry &other) const; GeosGeometry get_intersection(const GeosGeometry &other) const; @@ -94,6 +102,10 @@ class GeosGeometry { PreparedGeosGeometry get_prepared() const; + void get_extent(double &xmin, double &ymin, double &xmax, double &ymax) const { + GEOSGeom_getExtent_r(handle, geom, &xmin, &ymin, &xmax, &ymax); + } + private: GEOSContextHandle_t handle; GEOSGeometry *geom; @@ -332,6 +344,34 @@ inline GeosGeometry GeosGeometry::get_noded() const { return GeosGeometry(handle, GEOSNode_r(handle, geom)); } +inline GeosGeometry GeosGeometry::get_clipped(double xmin, double ymin, double xmax, double ymax) const { + return GeosGeometry(handle, GEOSClipByRect_r(handle, geom, xmin, ymin, xmax, ymax)); +} + +inline GeosGeometry GeosGeometry::get_transformed(const double matrix[6]) const { + // x' = a*x + b*y + e + // y' = c*x + d*y + f + return GeosGeometry(handle, GEOSGeom_transformXY_r( + handle, geom, + [](double *x_ptr, double *y_ptr, void *data) -> int { + const auto m = static_cast(data); + const auto &x = *x_ptr; + const auto &y = *y_ptr; + + const auto new_x = m[0] * x + m[1] * y + m[4]; + const auto new_y = m[2] * x + m[3] * y + m[5]; + + *x_ptr = new_x; + *y_ptr = new_y; + return 1; + }, + const_cast(matrix))); +} + +inline GeosGeometry GeosGeometry::get_gridded(double grid_size) const { + return GeosGeometry(handle, GEOSGeom_setPrecision_r(handle, geom, grid_size, GEOS_PREC_NO_TOPO)); +} + inline GeosGeometry GeosGeometry::get_maximum_inscribed_circle() const { double xmin = 0; double ymin = 0; @@ -411,6 +451,10 @@ inline void GeosGeometry::normalize_in_place() const { GEOSNormalize_r(handle, geom); } +inline void GeosGeometry::orient_polygons(bool ext_cw) { + GEOSOrientPolygons_r(handle, geom, ext_cw ? 1 : 0); +} + inline GeosGeometry GeosGeometry::get_difference(const GeosGeometry &other) const { return GeosGeometry(handle, GEOSDifference_r(handle, geom, other.geom)); } diff --git a/src/spatial/modules/geos/geos_module.cpp b/src/spatial/modules/geos/geos_module.cpp index 0c0cb337..43ff68dc 100644 --- a/src/spatial/modules/geos/geos_module.cpp +++ b/src/spatial/modules/geos/geos_module.cpp @@ -7,6 +7,7 @@ #include "duckdb/common/vector_operations/senary_executor.hpp" #include "duckdb/common/vector_operations/generic_executor.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" namespace duckdb { @@ -192,6 +193,230 @@ class AsymmetricPreparedBinaryFunction { namespace { +//====================================================================================================================== +// ST_AsMVTGeom +//====================================================================================================================== + +struct ST_AsMVTGeom { + + //------------------------------------------------------------------------------------------------------------------ + // Bind + //------------------------------------------------------------------------------------------------------------------ + struct BindData final : FunctionData { + int32_t extent = 4096; + int32_t buffer = 256; + bool clip = true; + + unique_ptr Copy() const override { + auto result = make_uniq(); + result->extent = extent; + result->buffer = buffer; + result->clip = clip; + return std::move(result); + } + bool Equals(const FunctionData &other_p) const override { + auto &other = other_p.Cast(); + return extent == other.extent && buffer == other.buffer && clip == other.clip; + } + }; + + static unique_ptr Bind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + auto result = make_uniq(); + + // Extract parameters + auto folded_extent = false; + auto folded_buffer = false; + auto folded_clip = false; + + if (arguments.size() >= 3) { + auto &extent_expr = arguments[2]; + if (extent_expr->IsFoldable()) { + auto extent_val = ExpressionExecutor::EvaluateScalar(context, *extent_expr); + result->extent = extent_val.GetValue(); + folded_extent = true; + } else { + throw InvalidInputException("ST_AsMVTGeom: \"tile_extent\" must be a constant"); + } + } + if (arguments.size() >= 4) { + auto &buffer_expr = arguments[3]; + if (buffer_expr->IsFoldable()) { + auto buffer_val = ExpressionExecutor::EvaluateScalar(context, *buffer_expr); + result->buffer = buffer_val.GetValue(); + folded_buffer = true; + } else { + throw InvalidInputException("ST_AsMVTGeom: \"buffer\" must be a constant"); + } + } + if (arguments.size() == 5) { + auto &clip_geom_expr = arguments[4]; + if (clip_geom_expr->IsFoldable()) { + auto clip_geom_val = ExpressionExecutor::EvaluateScalar(context, *clip_geom_expr); + result->clip = clip_geom_val.GetValue(); + folded_clip = true; + } else { + throw InvalidInputException("ST_AsMVTGeom: \"clip_geom\" must be a constant"); + } + } + + // Erase back to front + if (folded_clip) { + Function::EraseArgument(bound_function, arguments, 4); + } + if (folded_buffer) { + Function::EraseArgument(bound_function, arguments, 3); + } + if (folded_extent) { + Function::EraseArgument(bound_function, arguments, 2); + } + + return std::move(result); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + + // Bind data + const auto &func_expr = state.expr.Cast(); + const auto &bind_data = func_expr.bind_info->Cast(); + + // Local state + auto &lstate = LocalState::ResetAndGet(state); + + UnifiedVectorFormat geom_format; + UnifiedVectorFormat bbox_format; + UnifiedVectorFormat minx_format; + UnifiedVectorFormat miny_format; + UnifiedVectorFormat maxx_format; + UnifiedVectorFormat maxy_format; + + args.data[0].ToUnifiedFormat(args.size(), geom_format); + args.data[1].ToUnifiedFormat(args.size(), bbox_format); + + const auto &bbox_parts = StructVector::GetEntries(args.data[1]); + bbox_parts[0]->ToUnifiedFormat(args.size(), minx_format); + bbox_parts[1]->ToUnifiedFormat(args.size(), miny_format); + bbox_parts[2]->ToUnifiedFormat(args.size(), maxx_format); + bbox_parts[3]->ToUnifiedFormat(args.size(), maxy_format); + + const auto geom_data = UnifiedVectorFormat::GetData(geom_format); + const auto minx_data = UnifiedVectorFormat::GetData(minx_format); + const auto miny_data = UnifiedVectorFormat::GetData(miny_format); + const auto maxx_data = UnifiedVectorFormat::GetData(maxx_format); + const auto maxy_data = UnifiedVectorFormat::GetData(maxy_format); + + const auto res_data = FlatVector::GetData(result); + + for (idx_t out_idx = 0; out_idx < args.size(); out_idx++) { + const auto geom_idx = geom_format.sel->get_index(out_idx); + const auto bbox_idx = bbox_format.sel->get_index(out_idx); + const auto minx_idx = minx_format.sel->get_index(bbox_idx); + const auto miny_idx = miny_format.sel->get_index(bbox_idx); + const auto maxx_idx = maxx_format.sel->get_index(bbox_idx); + const auto maxy_idx = maxy_format.sel->get_index(bbox_idx); + + if (!geom_format.validity.RowIsValid(geom_idx) || !bbox_format.validity.RowIsValid(bbox_idx) || + !minx_format.validity.RowIsValid(minx_idx) || !miny_format.validity.RowIsValid(miny_idx) || + !maxx_format.validity.RowIsValid(maxx_idx) || !maxy_format.validity.RowIsValid(maxy_idx)) { + FlatVector::SetNull(result, out_idx, true); + } + + const auto &blob = geom_data[geom_idx]; + auto geom = lstate.Deserialize(blob); + + // Orient polygons in place + geom.orient_polygons(true); + + // Compute bounds + const auto extent = bind_data.extent; + + const auto minx = minx_data[minx_idx]; + const auto miny = miny_data[miny_idx]; + const auto maxx = maxx_data[maxx_idx]; + const auto maxy = maxy_data[maxy_idx]; + + const auto tile_w = maxx - minx; + const auto tile_h = maxy - miny; + + if (tile_w <= 0 || tile_h <= 0) { + throw InvalidInputException("ST_AsMVTGeom: tile width and height must be positive"); + } + + // Note: Y-axis is flipped in MVT coordinate system + const auto scale_x = extent / tile_w; + const auto scale_y = -(extent / tile_h); + + // Create transformation: translate to origin, then scale to tile extent + const double affine_matrix[6] = { + scale_x, // a: x scale + 0.0, // b: x skew + 0.0, // c: y skew + scale_y, // d: y scale (negative for flip) + -minx * scale_x, // e: x translation + -maxy * scale_y // f: y translation (with flip adjustment) + }; + + // Apply transformation + const auto transformed = geom.get_transformed(affine_matrix); + + // Snap to grid (round coordinates to integers) + const auto snapped = transformed.get_gridded(1.0); + + // Should we clip? if not, return the snapped geometry + if (!bind_data.clip) { + res_data[out_idx] = lstate.Serialize(result, snapped); + continue; + } + + // Apply buffer and clip if specified + const auto clip_minx = -bind_data.buffer; + const auto clip_miny = -bind_data.buffer; + const auto clip_maxx = extent + bind_data.buffer; + const auto clip_maxy = extent + bind_data.buffer; + + const auto clipped = snapped.get_clipped(clip_minx, clip_miny, clip_maxx, clip_maxy); + + if (clipped.is_empty()) { + FlatVector::SetNull(result, out_idx, true); + continue; + } + + // Snap again to clean up any potential issues from clipping + const auto cleaned_clipped = clipped.get_gridded(1.0); + + res_data[out_idx] = lstate.Serialize(result, cleaned_clipped); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(ExtensionLoader &loader) { + FunctionBuilder::RegisterScalar(loader, "ST_AsMVTGeom", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("bounds", GeoTypes::BOX_2D()); + variant.AddParameter("extent", LogicalType::BIGINT); + variant.AddParameter("buffer", LogicalType::BIGINT); + variant.AddParameter("clip_geom", LogicalType::BOOLEAN); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + variant.SetBind(Bind); + }); + + func.SetDescription(R"(Returns a geometry transformed and clipped to fit within a tile boundary. + The geometry should be in the same SRS as the tile coordinates.)"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + struct ST_Boundary { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { const auto &lstate = LocalState::ResetAndGet(state); @@ -2717,6 +2942,7 @@ struct ST_CoverageInvalidEdges_Agg : GEOSCoverageAggFunction { void RegisterGEOSModule(ExtensionLoader &loader) { // Scalar Functions + ST_AsMVTGeom::Register(loader); ST_Boundary::Register(loader); ST_Buffer::Register(loader); ST_BuildArea::Register(loader); diff --git a/src/spatial/modules/mvt/mvt_module.cpp b/src/spatial/modules/mvt/mvt_module.cpp index 2b08daba..38a38690 100644 --- a/src/spatial/modules/mvt/mvt_module.cpp +++ b/src/spatial/modules/mvt/mvt_module.cpp @@ -2,18 +2,20 @@ #include "spatial/modules/mvt/mvt_module.hpp" +#include "duckdb/common/types/hash.hpp" #include "duckdb/common/vector_operations/generic_executor.hpp" #include "spatial/geometry/geometry_serialization.hpp" #include "spatial/geometry/sgl.hpp" #include "spatial/spatial_types.hpp" #include "spatial/util/function_builder.hpp" +#include "protozero/buffer_vector.hpp" +#include "protozero/basic_pbf_writer.hpp" +#include "spatial/util/binary_reader.hpp" + namespace duckdb { namespace { -// ###################################################################################################################### -// Util -// ###################################################################################################################### //====================================================================================================================== // LocalState @@ -58,9 +60,9 @@ string_t LocalState::Serialize(Vector &vector, const sgl::geometry &geom) { return blob; } -} // namespace - -namespace { +//====================================================================================================================== +// ST_TileEnvelope +//====================================================================================================================== struct ST_TileEnvelope { static constexpr double RADIUS = 6378137.0; @@ -89,10 +91,10 @@ struct ST_TileEnvelope { } static void validate_tile_index_arguments(uint32_t zoom_extent, int32_t tile_x, int32_t tile_y) { - if ((tile_x < 0) || ((uint32_t)tile_x >= zoom_extent)) { + if ((tile_x < 0) || (static_cast(tile_x) >= zoom_extent)) { throw InvalidInputException("ST_TileEnvelope: tile_x is out of range for specified tile_zoom"); } - if ((tile_y < 0) || ((uint32_t)tile_y >= zoom_extent)) { + if ((tile_y < 0) || (static_cast(tile_y) >= zoom_extent)) { throw InvalidInputException("ST_TileEnvelope: tile_y is out of range for specified tile_zoom"); } } @@ -167,12 +169,808 @@ struct ST_TileEnvelope { } }; +//====================================================================================================================== +// ST_AsMVT +//====================================================================================================================== +enum class MVTValueType : uint32_t { + INT = 1, + FLOAT = 2, + STRING = 3, + BOOL = 4, +}; + +struct MVTValue { + MVTValueType type; + uint32_t size; + union { + int64_t int_value; + double double_value; + const char *string_value; + bool bool_value; + }; +}; + +struct MVTValueEq { + bool operator()(const MVTValue &a, const MVTValue &b) const { + if (a.type != b.type) { + return false; + } + switch (a.type) { + case MVTValueType::INT: + return a.int_value == b.int_value; + case MVTValueType::FLOAT: + return a.double_value == b.double_value; + case MVTValueType::STRING: + return (a.size == b.size) && (strncmp(a.string_value, b.string_value, a.size) == 0); + case MVTValueType::BOOL: + return a.bool_value == b.bool_value; + } + return false; // Should not reach here + } +}; + +struct MVTValueHash { + size_t operator()(const MVTValue &val) const { + // Use duckdb::Hash + size_t h1 = duckdb::Hash(static_cast(val.type)); + size_t h2 = 0; + switch (val.type) { + case MVTValueType::INT: + h2 = duckdb::Hash(val.int_value); + break; + case MVTValueType::FLOAT: + h2 = duckdb::Hash(val.double_value); + break; + case MVTValueType::STRING: + h2 = duckdb::Hash(val.string_value, val.size); + break; + case MVTValueType::BOOL: + h2 = duckdb::Hash(val.bool_value); + break; + } + return h1 ^ (h2 << 1); // Combine the two hashes + } +}; + +using MVTValueDictionary = unordered_map; + +struct MVTFeature { + MVTFeature *next; + uint32_t id; + uint32_t type; + uint32_t geom_array_size; + uint32_t tags_array_size; + uint32_t *geom_array_data; + uint32_t *tags_array_keys; + MVTValue *tags_array_vals; +}; + +struct MVTLayer { + MVTFeature *features_head = nullptr; + MVTFeature *features_tail = nullptr; + + void Absorb(MVTLayer &other) { + // Append other's features to this layer + if (other.features_head) { + if (features_tail) { + features_tail->next = other.features_head; + features_tail = other.features_tail; + } else { + features_head = other.features_head; + features_tail = other.features_tail; + } + other.features_head = nullptr; + other.features_tail = nullptr; + } + } + + void Combine(ArenaAllocator &allocator, const MVTLayer &other) { + // Copy the features from the other into this, but reference the same values + auto other_feature = other.features_head; + while (other_feature) { + const auto new_feature_mem = allocator.AllocateAligned(sizeof(MVTFeature)); + const auto new_feature = new (new_feature_mem) MVTFeature(); + + // Copy the feature data + *new_feature = *other_feature; + + new_feature->next = nullptr; + if (features_tail) { + features_tail->next = new_feature; + features_tail = new_feature; + } else { + features_head = new_feature; + features_tail = new_feature; + } + + other_feature = other_feature->next; + } + } + + // Write the layer to the buffer + void Finalize(const uint32_t extent, const vector &tag_names, const string &layer_name, + vector &buffer, MVTValueDictionary &tag_dict) { + + protozero::basic_pbf_writer> tile_writer {buffer}; + protozero::basic_pbf_writer> layer_writer {tile_writer, 3}; // layers = 3 + + // Add version + layer_writer.add_uint32(15, 2); + + // Layer name = 1 + layer_writer.add_string(1, layer_name); + + // Add layer name + //layer_writer.add_string(1, bdata.layer_name); + + uint64_t fid = 0; + + auto feature = features_head; + while (feature) { + + protozero::basic_pbf_writer> feature_writer {layer_writer, 2}; // features = 2 + + // Id = 1 + feature_writer.add_uint64(1, fid++); + + // Tags = 2 + { + protozero::detail::packed_field_varint, uint32_t> tags_writer(feature_writer, 2); + for (uint32_t tag_idx = 0; tag_idx < feature->tags_array_size; tag_idx++) { + const auto &key_idx = feature->tags_array_keys[tag_idx]; + const auto &val = feature->tags_array_vals[tag_idx]; + + // Try to find the value in the dictionary + // If it exists, we use the existing index + // If it does not exist, we add it to the dictionary and use the newly added index + const auto val_idx = + tag_dict.insert(make_pair(val, static_cast(tag_dict.size()))).first->second; + + tags_writer.add_element(key_idx); + tags_writer.add_element(val_idx); + } + } + + // Type = 3 + feature_writer.add_uint32(3, feature->type); + + // Geometry = 4 + feature_writer.add_packed_uint32(4, feature->geom_array_data, + feature->geom_array_data + feature->geom_array_size); + + feature = feature->next; + } + + // Tag Keys = 3 + for (auto &key : tag_names) { + layer_writer.add_string(3, key); + } + + for (const auto &tag : tag_dict) { + auto &val = tag.first; + protozero::basic_pbf_writer> val_writer {layer_writer, 4}; // values = 4 + switch (val.type) { + case MVTValueType::INT: { + val_writer.add_int64(4, val.int_value); + } break; + case MVTValueType::FLOAT: { + layer_writer.add_double(3, val.double_value); + } break; + case MVTValueType::STRING: { + layer_writer.add_string(1, val.string_value, val.size); + } break; + default: + throw InternalException("ST_AsMVT: Unsupported MVT value type"); + } + } + + // Extent = 5 + layer_writer.add_uint32(5, extent); + } +}; + +class MVTFeatureBuilder { +public: + void Reset() { + id = 0; + geometry_type = 0; + geometry.clear(); + tags.clear(); + } + + void SetGeometry(const string_t &geom_blob) { + + BinaryReader cursor(geom_blob.GetData(), geom_blob.GetSize()); + const auto type = static_cast(cursor.Read() + 1); + const auto flags = cursor.Read(); + cursor.Skip(sizeof(uint16_t)); + cursor.Skip(sizeof(uint32_t)); // padding + + // Parse flags + const auto has_z = (flags & 0x01) != 0; + const auto has_m = (flags & 0x02) != 0; + const auto has_bbox = (flags & 0x04) != 0; + + const auto format_v1 = (flags & 0x40) != 0; + const auto format_v0 = (flags & 0x80) != 0; + + if (format_v1 || format_v0) { + // Unsupported version, throw an error + throw NotImplementedException( + "This geometry seems to be written with a newer version of the DuckDB spatial library that is not " + "compatible with this version. Please upgrade your DuckDB installation."); + } + + if (has_bbox) { + // Skip past bbox if present + cursor.Skip(sizeof(float) * 2 * (2 + has_z + has_m)); + } + + // Read the first type + cursor.Skip(sizeof(uint32_t)); + + const auto vertex_width = (2 + (has_z ? 1 : 0) + (has_m ? 1 : 0)) * sizeof(double); + const auto vertex_space = vertex_width - (2 * sizeof(double)); // Space for x and y + + switch (type) { + case sgl::geometry_type::POINT: { + geometry_type = 1; // MVT_POINT + + // Read the point geometry + const auto vertex_count = cursor.Read(); + if (vertex_count == 0) { + // No vertices, skip + throw InvalidInputException("ST_AsMVT: POINT geometry cant be empty"); + } + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + geometry.push_back(protozero::encode_zigzag32(x)); + geometry.push_back(protozero::encode_zigzag32(y)); + + } break; + case sgl::geometry_type::LINESTRING: { + geometry_type = 2; // MVT_LINESTRING + + const auto vertex_count = cursor.Read(); + if (vertex_count < 2) { + // Invalid linestring, skip + throw InvalidInputException("ST_AsMVT: LINESTRING geometry cant contain less than 2 vertices"); + } + // Read the vertices + int32_t cursor_x = 0; + int32_t cursor_y = 0; + + for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { + + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + if (vertex_idx == 0) { + geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + geometry.push_back((2 & 0x7) | ((vertex_count - 1) << 3)); // LineTo, part count + } else { + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + } + + cursor_x = x; + cursor_y = y; + } + } break; + case sgl::geometry_type::POLYGON: { + geometry_type = 3; // MVT_POLYGON + + const auto part_count = cursor.Read(); + if (part_count == 0) { + // No parts, invalid + throw InvalidInputException("ST_AsMVT: POLYGON geometry cant be empty"); + } + + int32_t cursor_x = 0; + int32_t cursor_y = 0; + + auto ring_cursor = cursor; + cursor.Skip((part_count * 4) + (part_count % 2 == 1 ? 4 : 0)); // Skip part types and padding + for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { + const auto vertex_count = ring_cursor.Read(); + if (vertex_count < 3) { + // Invalid polygon, skip + throw InvalidInputException("ST_AsMVT: POLYGON ring cant contain less than 3 vertices"); + } + + for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + if (vertex_idx == 0) { + geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + geometry.push_back((2 & 0x7) | ((vertex_count - 2) << 3)); + + cursor_x = x; + cursor_y = y; + + } else if (vertex_idx == vertex_count - 1) { + // Close the ring + geometry.push_back((7 & 0x7) | (1 << 3)); // ClosePath + } else { + // Add the vertex + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + + cursor_x = x; + cursor_y = y; + } + } + } + } break; + case sgl::geometry_type::MULTI_POINT: { + geometry_type = 1; // MVT_POINT + + const auto part_count = cursor.Read(); + if (part_count == 0) { + throw InvalidInputException("ST_AsMVT: MULTI_POINT geometry cant be empty"); + } + + int32_t cursor_x = 0; + int32_t cursor_y = 0; + + geometry.push_back((1 & 0x7) | (part_count << 3)); // MoveTo, part count + + // Read the parts + for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { + cursor.Skip(sizeof(uint32_t)); // Skip part type + const auto vertex_count = cursor.Read(); + if (vertex_count == 0) { + // No vertices, skip + throw InvalidInputException("ST_AsMVT: POINT geometry cant be empty"); + } + + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + + cursor_x = x; + cursor_y = y; + } + } break; + case sgl::geometry_type::MULTI_LINESTRING: { + geometry_type = 2; // MVT_LINESTRING + + // Read the multi-linestring geometry + const auto part_count = cursor.Read(); + if (part_count == 0) { + // No parts, invalid + throw InvalidInputException("ST_AsMVT: MULTI_LINESTRING geometry cant be empty"); + } + int32_t cursor_x = 0; + int32_t cursor_y = 0; + + for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { + cursor.Skip(sizeof(uint32_t)); // Skip part type + const auto vertex_count = cursor.Read(); + + if (vertex_count < 2) { + // Invalid linestring, skip + throw InvalidInputException("ST_AsMVT: LINESTRING geometry cant contain less than 2 vertices"); + } + + for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { + + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + if (vertex_idx == 0) { + geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + geometry.push_back((2 & 0x7) | ((vertex_count - 2) << 3)); // LineTo, part count + } else { + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + } + + cursor_x = x; + cursor_y = y; + } + } + } break; + case sgl::geometry_type::MULTI_POLYGON: { + geometry_type = 3; // MVT_POLYGON + + // Read the multi-linestring geometry + const auto poly_count = cursor.Read(); + if (poly_count == 0) { + // No parts, invalid + throw InvalidInputException("ST_AsMVT: MULTI_POLYGON geometry cant be empty"); + } + + int32_t cursor_x = 0; + int32_t cursor_y = 0; + + for (uint32_t poly_idx = 0; poly_idx < poly_count; poly_idx++) { + cursor.Skip(sizeof(uint32_t)); // Skip part type + const auto part_count = cursor.Read(); + if (part_count == 0) { + // No parts, invalid + throw InvalidInputException("ST_AsMVT: POLYGON geometry cant be empty"); + } + + auto ring_cursor = cursor; + cursor.Skip((part_count * 4) + (part_count % 2 == 1 ? 4 : 0)); // Skip part types and padding + + for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { + const auto vertex_count = ring_cursor.Read(); + if (vertex_count < 3) { + // Invalid polygon, skip + throw InvalidInputException("ST_AsMVT: POLYGON ring cant contain less than 3 vertices"); + } + + for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { + const auto x = CastDouble(cursor.Read()); + const auto y = CastDouble(cursor.Read()); + cursor.Skip(vertex_space); // Skip z and m if present + + if (vertex_idx == 0) { + geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + geometry.push_back((2 & 0x7) | ((vertex_count - 2) << 3)); + + cursor_x = x; + cursor_y = y; + + } else if (vertex_idx == vertex_count - 1) { + // Close the ring + geometry.push_back((7 & 0x7) | (1 << 3)); // ClosePath + } else { + // Add the vertex + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + + cursor_x = x; + cursor_y = y; + } + } + } + } + } break; + default: + throw InvalidInputException("ST_AsMVT: unsupported geometry type %d", static_cast(type)); + } + } + + void AddProperty(uint32_t key, const string_t &value) { + + MVTValue v; + v.type = MVTValueType::STRING; + v.size = static_cast(value.GetSize()); + v.string_value = value.GetData(); + + tags.emplace_back(key, v); + } + + void AddProperty(uint32_t key, int64_t value) { + MVTValue v; + v.type = MVTValueType::INT; + v.size = sizeof(int64_t); + v.int_value = value; + + tags.emplace_back(key, v); + } + + bool IsEmpty() const { + return geometry.empty(); + } + + void Finalize(ArenaAllocator &arena, MVTLayer &layer) { + if (geometry.empty()) { + // No geometry, skip + return; + } + + const auto feature_mem = arena.AllocateAligned(sizeof(MVTFeature)); + const auto feature_ptr = new (feature_mem) MVTFeature(); + + feature_ptr->next = nullptr; + feature_ptr->id = id; + feature_ptr->type = geometry_type; + + // Copy over the geometry data + feature_ptr->geom_array_data = + reinterpret_cast(arena.AllocateAligned(geometry.size() * sizeof(uint32_t))); + feature_ptr->geom_array_size = static_cast(geometry.size()); + memcpy(feature_ptr->geom_array_data, geometry.data(), geometry.size() * sizeof(uint32_t)); + + // Copy over the tags + feature_ptr->tags_array_size = static_cast(tags.size()); + if (feature_ptr->tags_array_size != 0) { + + feature_ptr->tags_array_keys = + reinterpret_cast(arena.AllocateAligned(feature_ptr->tags_array_size * sizeof(uint32_t))); + feature_ptr->tags_array_vals = + reinterpret_cast(arena.AllocateAligned(feature_ptr->tags_array_size * sizeof(MVTValue))); + + for (idx_t i = 0; i < tags.size(); i++) { + feature_ptr->tags_array_keys[i] = tags[i].first; + feature_ptr->tags_array_vals[i] = tags[i].second; + } + } + + // Append to the layer + if (layer.features_tail) { + layer.features_tail->next = feature_ptr; + layer.features_tail = feature_ptr; + } else { + layer.features_head = feature_ptr; + layer.features_tail = feature_ptr; + } + } + +private: + static int32_t CastDouble(double d) { + if (d < static_cast(std::numeric_limits::min()) || + d > static_cast(std::numeric_limits::max())) { + throw InvalidInputException("ST_AsMVT: coordinate out of range for int32_t"); + } + return static_cast(d); + } + + uint32_t id = 0; + uint32_t geometry_type = 0; + vector geometry; + vector> tags; +}; + +struct ST_AsMVT { + + //------------------------------------------------------------------------------------------------------------------ + // Bind + //------------------------------------------------------------------------------------------------------------------ + struct BindData final : FunctionData { + + idx_t geometry_column_idx = 0; + string layer_name = "layer"; + uint32_t extent = 4096; + vector tag_names; + + unique_ptr Copy() const override { + auto result = make_uniq(); + result->geometry_column_idx = geometry_column_idx; + return std::move(result); + } + + bool Equals(const FunctionData &other_p) const override { + auto &other = other_p.Cast(); + return geometry_column_idx == other.geometry_column_idx; + } + }; + + static unique_ptr Bind(ClientContext &context, AggregateFunction &function, + vector> &arguments) { + auto result = make_uniq(); + + string geom_name; + + // Figure part of the row is the geometry column + const auto &row_type = arguments[0]->return_type; + if (row_type.id() != LogicalTypeId::STRUCT) { + throw InvalidInputException("ST_AsMVT: first argument must be a STRUCT (i.e. a row type)"); + } + + optional_idx geom_idx = optional_idx::Invalid(); + + if (geom_name.empty()) { + // Look for the first geometry column + for (idx_t i = 0; i < StructType::GetChildCount(row_type); i++) { + auto &child = StructType::GetChildType(row_type, i); + if (child == GeoTypes::GEOMETRY()) { + if (geom_idx != optional_idx::Invalid()) { + throw InvalidInputException("ST_AsMVT: only one geometry column is allowed in the input row"); + } + geom_idx = i; + } + } + } else { + // Look for the geometry column by name + for (idx_t i = 0; i < StructType::GetChildCount(row_type); i++) { + auto &child = StructType::GetChildType(row_type, i); + auto &child_name = StructType::GetChildName(row_type, i); + if (child == GeoTypes::GEOMETRY() && child_name == geom_name) { + if (geom_idx != optional_idx::Invalid()) { + throw InvalidInputException("ST_AsMVT: only one geometry column is allowed in the input row"); + } + geom_idx = i; + } + } + } + if (!geom_idx.IsValid()) { + throw InvalidInputException("ST_AsMVT: input row must contain a geometry column"); + } + + result->geometry_column_idx = geom_idx.GetIndex(); + + return std::move(result); + } + + //------------------------------------------------------------------------------------------------------------------ + // Initialize + //------------------------------------------------------------------------------------------------------------------ + struct State { + MVTLayer layer; + }; + + static idx_t StateSize(const AggregateFunction &) { + return sizeof(State); + } + + static void Initialize(const AggregateFunction &, data_ptr_t state_mem) { + new (state_mem) State(); + } + + //------------------------------------------------------------------------------------------------------------------ + // Update + //------------------------------------------------------------------------------------------------------------------ + static void Update(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &state_vec, idx_t count) { + const auto &bdata = aggr.bind_data->Cast(); + const auto &row_cols = StructVector::GetEntries(inputs[0]); + + UnifiedVectorFormat state_format; + UnifiedVectorFormat geom_format; + vector property_formats; + vector property_types; + + state_vec.ToUnifiedFormat(count, state_format); + + for (idx_t col_idx = 0; col_idx < row_cols.size(); col_idx++) { + if (col_idx == bdata.geometry_column_idx) { + row_cols[col_idx]->ToUnifiedFormat(count, geom_format); + } else { + property_formats.emplace_back(); + row_cols[col_idx]->ToUnifiedFormat(count, property_formats.back()); + property_types.push_back(row_cols[col_idx]->GetType()); + } + } + + // Reusable geometry buffer + MVTFeatureBuilder feature; + + for (idx_t row_idx = 0; row_idx < count; row_idx++) { + const auto state_idx = state_format.sel->get_index(row_idx); + auto &layer = UnifiedVectorFormat::GetData(state_format)[state_idx]->layer; + + const auto geom_idx = geom_format.sel->get_index(row_idx); + if (!geom_format.validity.RowIsValid(geom_idx)) { + // Skip if geometry is NULL + continue; + } + + auto &geom_blob = UnifiedVectorFormat::GetData(geom_format)[geom_idx]; + + // Reset the feature + feature.Reset(); + + // Set geometry + feature.SetGeometry(geom_blob); + + // Add properties + for (idx_t prop_vec_idx = 0; prop_vec_idx < property_formats.size(); prop_vec_idx++) { + const auto &prop_format = property_formats[prop_vec_idx]; + const auto prop_row_idx = prop_format.sel->get_index(row_idx); + if (!prop_format.validity.RowIsValid(prop_row_idx)) { + // Skip if property is NULL + continue; + } + + // Switch on property type + auto &prop_type = property_types[prop_vec_idx]; + switch (prop_type.id()) { + case LogicalTypeId::VARCHAR: { + auto &prop_val = UnifiedVectorFormat::GetData(prop_format)[prop_row_idx]; + feature.AddProperty(static_cast(prop_vec_idx), prop_val); + } break; + case LogicalTypeId::BIGINT: { + auto &prop_val = UnifiedVectorFormat::GetData(prop_format)[prop_row_idx]; + feature.AddProperty(static_cast(prop_vec_idx), prop_val); + + } break; + default: + throw InvalidInputException("ST_AsMVT: unsupported property type: %s", prop_type.ToString()); + } + } + + feature.Finalize(aggr.allocator, layer); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Combine + //------------------------------------------------------------------------------------------------------------------ + static void Combine(Vector &source_vec, Vector &target_vec, AggregateInputData &aggr, idx_t count) { + UnifiedVectorFormat source_format; + source_vec.ToUnifiedFormat(count, source_format); + + const auto source_ptr = UnifiedVectorFormat::GetData(source_format); + const auto target_ptr = FlatVector::GetData(target_vec); + + for (idx_t row_idx = 0; row_idx < count; row_idx++) { + auto &source = *source_ptr[source_format.sel->get_index(row_idx)]; + auto &target = *target_ptr[row_idx]; + + if (aggr.combine_type == AggregateCombineType::ALLOW_DESTRUCTIVE) { + // Absorb the feature data from source into target + target.layer.Absorb(source.layer); + } else { + // Append the feature data from source to target + target.layer.Combine(aggr.allocator, source.layer); + } + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Finalize + //------------------------------------------------------------------------------------------------------------------ + static void Finalize(Vector &state_vec, AggregateInputData &aggr, Vector &result, idx_t count, idx_t offset) { + const auto &bdata = aggr.bind_data->Cast(); + + UnifiedVectorFormat state_format; + state_vec.ToUnifiedFormat(count, state_format); + const auto state_ptr = UnifiedVectorFormat::GetData(state_format); + + vector buffer; + MVTValueDictionary tag_dict; + + for (idx_t raw_idx = 0; raw_idx < count; raw_idx++) { + auto &state = *state_ptr[state_format.sel->get_index(raw_idx)]; + const auto out_idx = raw_idx + offset; + + buffer.clear(); + tag_dict.clear(); + + state.layer.Finalize(bdata.extent, bdata.tag_names, bdata.layer_name, buffer, tag_dict); + + // Now we have the layer buffer, we can write it to the result vector + const auto result_data = FlatVector::GetData(result); + result_data[out_idx] = StringVector::AddStringOrBlob(result, buffer.data(), buffer.size()); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(ExtensionLoader &loader) { + AggregateFunction agg({LogicalTypeId::ANY}, LogicalType::BLOB, StateSize, Initialize, Update, Combine, Finalize, + nullptr, Bind); + + FunctionBuilder::RegisterAggregate(loader, "ST_AsMVT", [&](AggregateFunctionBuilder &func) { + func.SetFunction(agg); + func.SetDescription("Makes a vector tile from a set of geometries"); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + } // namespace -//------------------------------------------------------------------------------ +//====================================================================================================================== // Register -//------------------------------------------------------------------------------ +//====================================================================================================================== void RegisterMapboxVectorTileModule(ExtensionLoader &loader) { ST_TileEnvelope::Register(loader); -}; + ST_AsMVT::Register(loader); +} } // namespace duckdb From 6e1f7074016ad6c6683474c27f61615190ead775 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 10 Sep 2025 15:37:45 +0200 Subject: [PATCH 2/4] add ST_AsMVT --- duckdb | 2 +- src/spatial/modules/geos/geos_module.cpp | 30 ++ src/spatial/modules/mvt/mvt_module.cpp | 426 ++++++++++++++++++++--- test/sql/mvt/st_asmvt.test | 119 +++++++ 4 files changed, 524 insertions(+), 53 deletions(-) create mode 100644 test/sql/mvt/st_asmvt.test diff --git a/duckdb b/duckdb index 2ed9bf88..47993080 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 2ed9bf887f61a0ac226ab8c8f1164601d985d607 +Subproject commit 4799308087583835a7731a266262ba0fcac9af08 diff --git a/src/spatial/modules/geos/geos_module.cpp b/src/spatial/modules/geos/geos_module.cpp index 43ff68dc..843976b3 100644 --- a/src/spatial/modules/geos/geos_module.cpp +++ b/src/spatial/modules/geos/geos_module.cpp @@ -408,6 +408,36 @@ struct ST_AsMVTGeom { variant.SetFunction(Execute); variant.SetBind(Bind); }); + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("bounds", GeoTypes::BOX_2D()); + variant.AddParameter("extent", LogicalType::BIGINT); + variant.AddParameter("buffer", LogicalType::BIGINT); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + variant.SetBind(Bind); + }); + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("bounds", GeoTypes::BOX_2D()); + variant.AddParameter("extent", LogicalType::BIGINT); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + variant.SetBind(Bind); + }); + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("bounds", GeoTypes::BOX_2D()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + variant.SetBind(Bind); + }); func.SetDescription(R"(Returns a geometry transformed and clipped to fit within a tile boundary. The geometry should be in the same SRS as the tile coordinates.)"); diff --git a/src/spatial/modules/mvt/mvt_module.cpp b/src/spatial/modules/mvt/mvt_module.cpp index 38a38690..fc355dd1 100644 --- a/src/spatial/modules/mvt/mvt_module.cpp +++ b/src/spatial/modules/mvt/mvt_module.cpp @@ -173,19 +173,21 @@ struct ST_TileEnvelope { // ST_AsMVT //====================================================================================================================== enum class MVTValueType : uint32_t { - INT = 1, + STRING = 1, FLOAT = 2, - STRING = 3, - BOOL = 4, + DOUBLE = 3, + INT = 4, + BOOL = 7, }; struct MVTValue { MVTValueType type; uint32_t size; union { - int64_t int_value; - double double_value; const char *string_value; + float float_value; + double double_value; + int64_t int_value; bool bool_value; }; }; @@ -196,12 +198,14 @@ struct MVTValueEq { return false; } switch (a.type) { - case MVTValueType::INT: - return a.int_value == b.int_value; - case MVTValueType::FLOAT: - return a.double_value == b.double_value; case MVTValueType::STRING: return (a.size == b.size) && (strncmp(a.string_value, b.string_value, a.size) == 0); + case MVTValueType::FLOAT: + return a.float_value == b.float_value; + case MVTValueType::DOUBLE: + return a.double_value == b.double_value; + case MVTValueType::INT: + return a.int_value == b.int_value; case MVTValueType::BOOL: return a.bool_value == b.bool_value; } @@ -215,14 +219,17 @@ struct MVTValueHash { size_t h1 = duckdb::Hash(static_cast(val.type)); size_t h2 = 0; switch (val.type) { - case MVTValueType::INT: - h2 = duckdb::Hash(val.int_value); + case MVTValueType::STRING: + h2 = duckdb::Hash(val.string_value, val.size); break; case MVTValueType::FLOAT: + h2 = duckdb::Hash(val.float_value); + break; + case MVTValueType::DOUBLE: h2 = duckdb::Hash(val.double_value); break; - case MVTValueType::STRING: - h2 = duckdb::Hash(val.string_value, val.size); + case MVTValueType::INT: + h2 = duckdb::Hash(val.int_value); break; case MVTValueType::BOOL: h2 = duckdb::Hash(val.bool_value); @@ -232,11 +239,34 @@ struct MVTValueHash { } }; -using MVTValueDictionary = unordered_map; +class MVTValueSet { +public: + void Clear() { + map.clear(); + vec.clear(); + } + uint32_t Insert(const MVTValue &val) { + const auto it = map.insert(make_pair(val, static_cast(map.size()))); + if (it.second) { + // New entry, add it to the order vector + vec.emplace_back(it.first->first); + } + return it.first->second; + } + + vector> &GetOrderedValues() { + return vec; + } + +private: + // Unordered map is pointer-stable, so we can store references in the order vector + unordered_map map; + vector> vec; +}; struct MVTFeature { MVTFeature *next; - uint32_t id; + int32_t id; // Optional feature id, -1 if not set uint32_t type; uint32_t geom_array_size; uint32_t tags_array_size; @@ -289,7 +319,7 @@ struct MVTLayer { // Write the layer to the buffer void Finalize(const uint32_t extent, const vector &tag_names, const string &layer_name, - vector &buffer, MVTValueDictionary &tag_dict) { + vector &buffer, MVTValueSet &tag_dict) { protozero::basic_pbf_writer> tile_writer {buffer}; protozero::basic_pbf_writer> layer_writer {tile_writer, 3}; // layers = 3 @@ -300,18 +330,16 @@ struct MVTLayer { // Layer name = 1 layer_writer.add_string(1, layer_name); - // Add layer name - //layer_writer.add_string(1, bdata.layer_name); - - uint64_t fid = 0; - auto feature = features_head; while (feature) { protozero::basic_pbf_writer> feature_writer {layer_writer, 2}; // features = 2 // Id = 1 - feature_writer.add_uint64(1, fid++); + if (feature->id >= 0) { + // Only write if the id is set (not negative) + feature_writer.add_uint64(1, feature->id); + } // Tags = 2 { @@ -323,8 +351,7 @@ struct MVTLayer { // Try to find the value in the dictionary // If it exists, we use the existing index // If it does not exist, we add it to the dictionary and use the newly added index - const auto val_idx = - tag_dict.insert(make_pair(val, static_cast(tag_dict.size()))).first->second; + const auto val_idx = tag_dict.Insert(val); tags_writer.add_element(key_idx); tags_writer.add_element(val_idx); @@ -346,19 +373,25 @@ struct MVTLayer { layer_writer.add_string(3, key); } - for (const auto &tag : tag_dict) { - auto &val = tag.first; + for (const auto &tag : tag_dict.GetOrderedValues()) { + auto &val = tag.get(); protozero::basic_pbf_writer> val_writer {layer_writer, 4}; // values = 4 switch (val.type) { - case MVTValueType::INT: { + case MVTValueType::STRING: + val_writer.add_string(1, val.string_value, val.size); + break; + case MVTValueType::FLOAT: + val_writer.add_float(2, val.float_value); + break; + case MVTValueType::DOUBLE: + val_writer.add_double(3, val.double_value); + break; + case MVTValueType::INT: val_writer.add_int64(4, val.int_value); - } break; - case MVTValueType::FLOAT: { - layer_writer.add_double(3, val.double_value); - } break; - case MVTValueType::STRING: { - layer_writer.add_string(1, val.string_value, val.size); - } break; + break; + case MVTValueType::BOOL: + val_writer.add_bool(7, val.bool_value); + break; default: throw InternalException("ST_AsMVT: Unsupported MVT value type"); } @@ -372,12 +405,16 @@ struct MVTLayer { class MVTFeatureBuilder { public: void Reset() { - id = 0; + id = -1; geometry_type = 0; geometry.clear(); tags.clear(); } + void SetId(int32_t value) { + id = value; + } + void SetGeometry(const string_t &geom_blob) { BinaryReader cursor(geom_blob.GetData(), geom_blob.GetSize()); @@ -647,17 +684,49 @@ class MVTFeatureBuilder { } } } break; + case sgl::geometry_type::GEOMETRY_COLLECTION: { + throw InvalidInputException("ST_AsMVT: Geometries of type \"GEOMETRYCOLLECTION\" are not supported"); + } break; default: throw InvalidInputException("ST_AsMVT: unsupported geometry type %d", static_cast(type)); } } - void AddProperty(uint32_t key, const string_t &value) { + void AddProperty(idx_t key, const string_t &value, ArenaAllocator &allocator) { + // We need to copy the string into the arena, as the input string might be temporary MVTValue v; v.type = MVTValueType::STRING; v.size = static_cast(value.GetSize()); - v.string_value = value.GetData(); + + if (value.GetSize() != 0) { + const auto str_mem = allocator.Allocate(value.GetSize()); + memcpy(str_mem, value.GetData(), value.GetSize()); + v.string_value = const_char_ptr_cast(str_mem); + } + + tags.emplace_back(static_cast(key), v); + } + + void AddProperty(uint32_t key, float value) { + MVTValue v; + v.type = MVTValueType::FLOAT; + v.float_value = value; + + tags.emplace_back(key, v); + } + + void AddProperty(uint32_t key, double value) { + MVTValue v; + v.type = MVTValueType::DOUBLE; + v.double_value = value; + tags.emplace_back(key, v); + } + + void AddProperty(uint32_t key, bool value) { + MVTValue v; + v.type = MVTValueType::BOOL; + v.bool_value = value; tags.emplace_back(key, v); } @@ -665,12 +734,15 @@ class MVTFeatureBuilder { void AddProperty(uint32_t key, int64_t value) { MVTValue v; v.type = MVTValueType::INT; - v.size = sizeof(int64_t); v.int_value = value; tags.emplace_back(key, v); } + void AddProperty(uint32_t key, int32_t value) { + AddProperty(key, static_cast(value)); + } + bool IsEmpty() const { return geometry.empty(); } @@ -728,7 +800,7 @@ class MVTFeatureBuilder { return static_cast(d); } - uint32_t id = 0; + int32_t id = -1; uint32_t geometry_type = 0; vector geometry; vector> tags; @@ -743,18 +815,25 @@ struct ST_AsMVT { idx_t geometry_column_idx = 0; string layer_name = "layer"; - uint32_t extent = 4096; + int32_t extent = 4096; vector tag_names; + optional_idx feature_id_column_idx = optional_idx::Invalid(); unique_ptr Copy() const override { auto result = make_uniq(); result->geometry_column_idx = geometry_column_idx; + result->layer_name = layer_name; + result->extent = extent; + result->tag_names = tag_names; + result->feature_id_column_idx = feature_id_column_idx; return std::move(result); } bool Equals(const FunctionData &other_p) const override { auto &other = other_p.Cast(); - return geometry_column_idx == other.geometry_column_idx; + return geometry_column_idx == other.geometry_column_idx && layer_name == other.layer_name && + extent == other.extent && tag_names == other.tag_names && + feature_id_column_idx == other.feature_id_column_idx; } }; @@ -762,16 +841,86 @@ struct ST_AsMVT { vector> &arguments) { auto result = make_uniq(); - string geom_name; - // Figure part of the row is the geometry column const auto &row_type = arguments[0]->return_type; if (row_type.id() != LogicalTypeId::STRUCT) { throw InvalidInputException("ST_AsMVT: first argument must be a STRUCT (i.e. a row type)"); } - optional_idx geom_idx = optional_idx::Invalid(); + // Fold all the other parameters + auto folded_layer = false; + auto folded_extent = false; + auto folded_geom = false; + auto folded_feature = false; + + if (arguments.size() >= 2) { + auto &layer_expr = arguments[1]; + if (layer_expr->IsFoldable()) { + auto layer_val = ExpressionExecutor::EvaluateScalar(context, *layer_expr); + if (!layer_val.IsNull()) { + result->layer_name = StringValue::Get(layer_val); + if (result->layer_name.empty()) { + throw InvalidInputException("ST_AsMVT: layer name cannot be empty"); + } + } + folded_layer = true; + } else { + throw InvalidInputException("ST_AsMVT: layer name must be a constant string"); + } + } + + if (arguments.size() >= 3) { + auto &extent_expr = arguments[2]; + if (extent_expr->IsFoldable()) { + auto extent_val = ExpressionExecutor::EvaluateScalar(context, *extent_expr); + if (extent_val.IsNull()) { + throw InvalidInputException("ST_AsMVT: extent cannot be NULL"); + } + result->extent = IntegerValue::Get(extent_val); + if (result->extent == 0) { + throw InvalidInputException("ST_AsMVT: extent must be greater than zero"); + } + folded_extent = true; + } else { + throw InvalidInputException("ST_AsMVT: extent must be a constant integer"); + } + } + string geom_name; + if (arguments.size() >= 4) { + auto &geom_expr = arguments[3]; + if (geom_expr->IsFoldable()) { + auto geom_val = ExpressionExecutor::EvaluateScalar(context, *geom_expr); + if (!geom_val.IsNull()) { + geom_name = StringValue::Get(geom_val); + if (geom_name.empty()) { + throw InvalidInputException("ST_AsMVT: geometry column name cannot be empty"); + } + } + folded_geom = true; + } else { + throw InvalidInputException("ST_AsMVT: geometry column name must be a constant string"); + } + } + + string feature_id_name; + if (arguments.size() >= 5) { + auto &feature_expr = arguments[4]; + if (feature_expr->IsFoldable()) { + auto feature_val = ExpressionExecutor::EvaluateScalar(context, *feature_expr); + if (!feature_val.IsNull()) { + feature_id_name = StringValue::Get(feature_val); + if (feature_id_name.empty()) { + throw InvalidInputException("ST_AsMVT: feature id column name cannot be empty"); + } + } + folded_feature = true; + } else { + throw InvalidInputException("ST_AsMVT: feature id column name must be a constant string"); + } + } + // Fetch the geometry column index, either based on name or on position + optional_idx geom_idx = optional_idx::Invalid(); if (geom_name.empty()) { // Look for the first geometry column for (idx_t i = 0; i < StructType::GetChildCount(row_type); i++) { @@ -802,6 +951,63 @@ struct ST_AsMVT { result->geometry_column_idx = geom_idx.GetIndex(); + // Fetch the feature id column index, based on name if provided + if (!feature_id_name.empty()) { + // Look for the feature id column by name + for (idx_t i = 0; i < StructType::GetChildCount(row_type); i++) { + auto &child_name = StructType::GetChildName(row_type, i); + if (child_name == feature_id_name) { + if (result->feature_id_column_idx.IsValid()) { + throw InvalidInputException("ST_AsMVT: only one feature id column is allowed in the input row"); + } + auto &child_type = StructType::GetChildType(row_type, i); + if (child_type != LogicalTypeId::INTEGER && child_type != LogicalTypeId::BIGINT) { + throw InvalidInputException("ST_AsMVT: feature id column must be of type INTEGER or BIGINT"); + } + result->feature_id_column_idx = i; + } + } + if (!result->feature_id_column_idx.IsValid()) { + throw InvalidInputException("ST_AsMVT: feature id column not found in input row"); + } + } + + unordered_set valid_property_types = {LogicalTypeId::VARCHAR, LogicalTypeId::FLOAT, + LogicalTypeId::DOUBLE, LogicalTypeId::INTEGER, + LogicalTypeId::BIGINT, LogicalTypeId::BOOLEAN}; + + // Collect tag names + for (idx_t i = 0; i < StructType::GetChildCount(row_type); i++) { + if (i != result->geometry_column_idx && + (!result->feature_id_column_idx.IsValid() || i != result->feature_id_column_idx.GetIndex())) { + auto &name = StructType::GetChildName(row_type, i); + auto &type = StructType::GetChildType(row_type, i); + + if (valid_property_types.find(type.id()) == valid_property_types.end()) { + auto type_name = type.ToString(); + throw InvalidInputException("ST_AsMVT: property column \"%s\" has unsupported type \"%s\"\n" + "Only the following property types are supported: VARCHAR, FLOAT, " + "DOUBLE, INTEGER, BIGINT, BOOLEAN", + name.c_str(), type_name.c_str()); + } + result->tag_names.push_back(name); + } + } + + // Erase arguments, back to front + if (folded_feature) { + Function::EraseArgument(function, arguments, 4); + } + if (folded_geom) { + Function::EraseArgument(function, arguments, 3); + } + if (folded_extent) { + Function::EraseArgument(function, arguments, 2); + } + if (folded_layer) { + Function::EraseArgument(function, arguments, 1); + } + return std::move(result); } @@ -829,6 +1035,9 @@ struct ST_AsMVT { UnifiedVectorFormat state_format; UnifiedVectorFormat geom_format; + UnifiedVectorFormat fid_format; + LogicalType fid_type; + vector property_formats; vector property_types; @@ -837,6 +1046,9 @@ struct ST_AsMVT { for (idx_t col_idx = 0; col_idx < row_cols.size(); col_idx++) { if (col_idx == bdata.geometry_column_idx) { row_cols[col_idx]->ToUnifiedFormat(count, geom_format); + } else if (bdata.feature_id_column_idx.IsValid() && col_idx == bdata.feature_id_column_idx.GetIndex()) { + row_cols[col_idx]->ToUnifiedFormat(count, fid_format); + fid_type = row_cols[col_idx]->GetType(); } else { property_formats.emplace_back(); row_cols[col_idx]->ToUnifiedFormat(count, property_formats.back()); @@ -865,6 +1077,43 @@ struct ST_AsMVT { // Set geometry feature.SetGeometry(geom_blob); + if (feature.IsEmpty()) { + // No geometry, skip + continue; + } + + // Do we have a feature id? + if (bdata.feature_id_column_idx.IsValid()) { + const auto fid_idx = fid_format.sel->get_index(row_idx); + if (fid_format.validity.RowIsValid(fid_idx)) { + // Set the feature id + switch (fid_type.id()) { + case LogicalTypeId::TINYINT: { + auto &fid_val = UnifiedVectorFormat::GetData(fid_format)[fid_idx]; + feature.SetId(fid_val); + } break; + case LogicalTypeId::SMALLINT: { + auto &fid_val = UnifiedVectorFormat::GetData(fid_format)[fid_idx]; + feature.SetId(fid_val); + } + case LogicalTypeId::INTEGER: { + auto &fid_val = UnifiedVectorFormat::GetData(fid_format)[fid_idx]; + feature.SetId(fid_val); + } break; + case LogicalTypeId::BIGINT: { + auto &fid_val = UnifiedVectorFormat::GetData(fid_format)[fid_idx]; + if (fid_val < std::numeric_limits::min() || + fid_val > std::numeric_limits::max()) { + throw InvalidInputException("ST_AsMVT: feature id out of range for int32"); + } + feature.SetId(static_cast(fid_val)); + } break; + default: + throw InvalidInputException("ST_AsMVT: feature id column must be of type INTEGER or BIGINT"); + } + } + } + // Add properties for (idx_t prop_vec_idx = 0; prop_vec_idx < property_formats.size(); prop_vec_idx++) { const auto &prop_format = property_formats[prop_vec_idx]; @@ -879,12 +1128,27 @@ struct ST_AsMVT { switch (prop_type.id()) { case LogicalTypeId::VARCHAR: { auto &prop_val = UnifiedVectorFormat::GetData(prop_format)[prop_row_idx]; - feature.AddProperty(static_cast(prop_vec_idx), prop_val); + feature.AddProperty(prop_vec_idx, prop_val, aggr.allocator); + } break; + case LogicalTypeId::FLOAT: { + auto &prop_val = UnifiedVectorFormat::GetData(prop_format)[prop_row_idx]; + feature.AddProperty(prop_vec_idx, prop_val); + } break; + case LogicalTypeId::DOUBLE: { + auto &prop_val = UnifiedVectorFormat::GetData(prop_format)[prop_row_idx]; + feature.AddProperty(prop_vec_idx, prop_val); + } break; + case LogicalTypeId::INTEGER: { + auto &prop_val = UnifiedVectorFormat::GetData(prop_format)[prop_row_idx]; + feature.AddProperty(prop_vec_idx, prop_val); } break; case LogicalTypeId::BIGINT: { auto &prop_val = UnifiedVectorFormat::GetData(prop_format)[prop_row_idx]; - feature.AddProperty(static_cast(prop_vec_idx), prop_val); - + feature.AddProperty(prop_vec_idx, prop_val); + } break; + case LogicalTypeId::BOOLEAN: { + auto &prop_val = UnifiedVectorFormat::GetData(prop_format)[prop_row_idx]; + feature.AddProperty(prop_vec_idx, prop_val); } break; default: throw InvalidInputException("ST_AsMVT: unsupported property type: %s", prop_type.ToString()); @@ -930,14 +1194,14 @@ struct ST_AsMVT { const auto state_ptr = UnifiedVectorFormat::GetData(state_format); vector buffer; - MVTValueDictionary tag_dict; + MVTValueSet tag_dict; for (idx_t raw_idx = 0; raw_idx < count; raw_idx++) { auto &state = *state_ptr[state_format.sel->get_index(raw_idx)]; const auto out_idx = raw_idx + offset; buffer.clear(); - tag_dict.clear(); + tag_dict.Clear(); state.layer.Finalize(bdata.extent, bdata.tag_names, bdata.layer_name, buffer, tag_dict); @@ -947,17 +1211,75 @@ struct ST_AsMVT { } } + //------------------------------------------------------------------------------------------------------------------ + // Docs + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Make a Mapbox Vector Tile from a set of geometries and properties + The function takes as input a row type (STRUCT) containing a geometry column and any number of property columns. + It returns a single binary BLOB containing the Mapbox Vector Tile. + + The function has the following signature: + + `ST_AsMVT(row STRUCT, layer_name VARCHAR DEFAULT 'layer', extent INTEGER DEFAULT 4096, geom_column_name VARCHAR DEFAULT NULL, feature_id_column_name VARCHAR DEFAULT NULL) -> BLOB` + + - The first argument is a struct containing the geometry and properties. + - The second argument is the name of the layer in the vector tile. This argument is optional and defaults to 'layer'. + - The third argument is the extent of the tile. This argument is optional and defaults to 4096. + - The fourth argument is the name of the geometry column in the input row. This argument is optional. If not provided, the first geometry column in the input row will be used. If multiple geometry columns are present, an error will be raised. + - The fifth argument is the name of the feature id column in the input row. This argument is optional. If provided, the values in this column will be used as feature ids in the vector tile. The column must be of type INTEGER or BIGINT. If set to negative or NULL, a feature id will not be assigned to the corresponding feature. + + The input struct must contain exactly one geometry column of type GEOMETRY. It can contain any number of property columns of types VARCHAR, FLOAT, DOUBLE, INTEGER, BIGINT, or BOOLEAN. + + Example: + ```sql + SELECT ST_AsMVT({'geom': geom, 'id': id, 'name': name}, 'cities', 4096, 'geom', 'id') AS tile + FROM cities; + ``` + + This example creates a vector tile named 'cities' with an extent of 4096 from the 'cities' table, using 'geom' as the geometry column and 'id' as the feature id column. + + However, you probably want to use the ST_AsMVTGeom function to first transform and clip your geometries to the tile extent. + The following example assumes the geometry is in WebMercator ("EPSG:3857") coordinates. + Replace `{z}`, `{x}`, and `{y}` with the appropriate tile coordinates, `{your table}` with your table name, and `{tile_path}` with the path to write the tile to. + + ```sql + COPY ( + SELECT ST_AsMVT({{ + "geometry": ST_AsMVTGeom( + geometry, + ST_Extent(ST_TileEnvelope({z}, {x}, {y})), + 4096, + 256, + false + ) + }}) + FROM {your table} WHERE ST_Intersects(geometry, ST_TileEnvelope({z}, {x}, {y})) + ) to {tile_path} (FORMAT 'BLOB'); + ``` + )"; + //------------------------------------------------------------------------------------------------------------------ // Register //------------------------------------------------------------------------------------------------------------------ static void Register(ExtensionLoader &loader) { - AggregateFunction agg({LogicalTypeId::ANY}, LogicalType::BLOB, StateSize, Initialize, Update, Combine, Finalize, - nullptr, Bind); FunctionBuilder::RegisterAggregate(loader, "ST_AsMVT", [&](AggregateFunctionBuilder &func) { + // name, extent, layer_name, feature_id_name + const auto optional_args = {LogicalType::VARCHAR, LogicalType::INTEGER, LogicalType::VARCHAR, + LogicalType::VARCHAR}; + AggregateFunction agg({LogicalTypeId::ANY}, LogicalType::BLOB, StateSize, Initialize, Update, Combine, + Finalize, nullptr, Bind); + + // Push the variantsāˆ‚ func.SetFunction(agg); - func.SetDescription("Makes a vector tile from a set of geometries"); + for (auto &arg_type : optional_args) { + // Register all the variants with optional arguments + agg.arguments.push_back(arg_type); + func.SetFunction(agg); + } + func.SetDescription(DESCRIPTION); func.SetTag("ext", "spatial"); func.SetTag("category", "construction"); }); diff --git a/test/sql/mvt/st_asmvt.test b/test/sql/mvt/st_asmvt.test new file mode 100644 index 00000000..848058fc --- /dev/null +++ b/test/sql/mvt/st_asmvt.test @@ -0,0 +1,119 @@ +# name: test/sql/mvt/st_asmvt.test +# group: [mvt] + +require spatial + +# With default args +statement ok +COPY ( + SELECT st_asmvt( + {"geom": geom}, + 'my_layer' + ) as mvt + FROM ( + SELECT + row_number() over () as id, + st_point(x, y) as geom + FROM range(0, 100) as r(x), + range(0, 100) as rr(y) + ) +) TO '__TEST_DIR__/test_default_args.mvt' (FORMAT BLOB); + +query IIII +select unnest(layers, recursive := true) from st_read_meta('__TEST_DIR__/test_default_args.mvt'); +---- +my_layer 10000 [{'name': geom, 'type': Point, 'nullable': true, 'crs': NULL}] [{'name': mvt_id, 'type': Integer64, 'subtype': None, 'nullable': true, 'unique': false, 'width': 0, 'precision': 0}] + +# Simple test +statement ok +COPY ( + SELECT st_asmvt( + {"geom": geom, "fid": id}, + 'my_layer', + 4096, + 'geom', + 'fid' + ) as mvt + FROM ( + SELECT + row_number() over () as id, + st_point(x, y) as geom + FROM range(0, 100) as r(x), + range(0, 100) as rr(y) + ) +) TO '__TEST_DIR__/test.mvt' (FORMAT BLOB); + +query IIII +select unnest(layers, recursive := true) from st_read_meta('__TEST_DIR__/test.mvt'); +---- +my_layer 10000 [{'name': geom, 'type': Point, 'nullable': true, 'crs': NULL}] [{'name': mvt_id, 'type': Integer64, 'subtype': None, 'nullable': true, 'unique': false, 'width': 0, 'precision': 0}] + + +query I +select mvt_id from st_read('__TEST_DIR__/test.mvt') limit 3; +---- +1 +2 +3 + +# Advanced test +statement ok +COPY ( + SELECT st_asmvt( + { + "fid": -1, + "int_field": id::INTEGER, + "bigint_field": id::BIGINT, + "float_field": id::FLOAT, + "double_field": id::DOUBLE, + "string_field": id::VARCHAR, + "geom": geom, + }, + 'my_layer', + 4096, + 'geom', + 'fid' + ) as mvt + FROM ( + SELECT + row_number() over () as id, + ST_Buffer(st_point(x, y), 5) as geom + FROM range(0, 100) as r(x), + range(0, 100) as rr(y) + ) +) TO '__TEST_DIR__/test2.mvt' (FORMAT BLOB); + +query IIII +select unnest(layers, recursive := true) from st_read_meta('__TEST_DIR__/test2.mvt'); +---- +my_layer 10000 [{'name': geom, 'type': Polygon, 'nullable': true, 'crs': NULL}] [{'name': mvt_id, 'type': Integer64, 'subtype': None, 'nullable': true, 'unique': false, 'width': 0, 'precision': 0}, {'name': int_field, 'type': Integer, 'subtype': None, 'nullable': true, 'unique': false, 'width': 0, 'precision': 0}, {'name': bigint_field, 'type': Integer, 'subtype': None, 'nullable': true, 'unique': false, 'width': 0, 'precision': 0}, {'name': float_field, 'type': Real, 'subtype': Float32, 'nullable': true, 'unique': false, 'width': 0, 'precision': 0}, {'name': double_field, 'type': Real, 'subtype': None, 'nullable': true, 'unique': false, 'width': 0, 'precision': 0}, {'name': string_field, 'type': String, 'subtype': None, 'nullable': true, 'unique': false, 'width': 0, 'precision': 0}] + +query IIIIII +select mvt_id, int_field, bigint_field, float_field, double_field, string_field from st_read('__TEST_DIR__/test2.mvt') LIMIT 3; +---- +NULL 1 1 1.0 1.0 1 +NULL 2 2 2.0 2.0 2 +NULL 3 3 3.0 3.0 3 + + +# Check unsupported types +statement error +COPY ( + SELECT st_asmvt( + {"geom": geom, "fid": id, "other_field": st_point(1,2)}, + 'my_layer', + 4096, + 'geom', + 'fid' + ) as mvt + FROM ( + SELECT + row_number() over () as id, + st_point(x, y) as geom + FROM range(0, 100) as r(x), + range(0, 100) as rr(y) + ) +) TO '__TEST_DIR__/test3.mvt' (FORMAT BLOB); +---- +Invalid Input Error: ST_AsMVT: property column "other_field" has unsupported type "GEOMETRY" +Only the following property types are supported: VARCHAR, FLOAT, DOUBLE, INTEGER, BIGINT, BOOLEAN From 1877d5a99fa80f086160dea50d12d74be11c1b07 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 10 Sep 2025 15:39:26 +0200 Subject: [PATCH 3/4] run format --- src/sgl/sgl.cpp | 5 ++--- src/spatial/modules/geos/geos_module.cpp | 5 +++-- src/spatial/modules/main/spatial_functions_cast.cpp | 1 - src/spatial/modules/main/spatial_functions_scalar.cpp | 4 ++-- src/spatial/operators/spatial_join_physical.cpp | 3 ++- src/spatial/util/math.cpp | 4 ++-- test/sql/geometry/st_asgeojson.test | 3 +++ test/sql/geometry/st_makepoint.test | 3 +++ 8 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/sgl/sgl.cpp b/src/sgl/sgl.cpp index 27793af2..8a985f80 100644 --- a/src/sgl/sgl.cpp +++ b/src/sgl/sgl.cpp @@ -2976,7 +2976,7 @@ point_in_polygon_result prepared_geometry::contains(const vertex_xy &vert) const const auto end = math::min(node_end, levl_end); - if(stack[depth] != end) { + if (stack[depth] != end) { // Go sideways! stack[depth]++; break; @@ -3367,7 +3367,7 @@ static bool try_get_prepared_distance_lines(const prepared_geometry &lhs, const if (found_any) { distance = std::sqrt(min_dist); // Convert squared distance to actual distance - return true; // We found a distance + return true; // We found a distance } return false; // No distance found } @@ -3381,7 +3381,6 @@ bool prepared_geometry::try_get_distance(const prepared_geometry &other, double // WKT Parsing //====================================================================================================================== - namespace sgl { namespace { diff --git a/src/spatial/modules/geos/geos_module.cpp b/src/spatial/modules/geos/geos_module.cpp index 843976b3..89effe6a 100644 --- a/src/spatial/modules/geos/geos_module.cpp +++ b/src/spatial/modules/geos/geos_module.cpp @@ -439,8 +439,9 @@ struct ST_AsMVTGeom { variant.SetBind(Bind); }); - func.SetDescription(R"(Returns a geometry transformed and clipped to fit within a tile boundary. - The geometry should be in the same SRS as the tile coordinates.)"); + func.SetDescription(R"(Transform and clip geometry to a tile boundary + + - See "ST_AsMVT" for more details)"); func.SetTag("ext", "spatial"); func.SetTag("category", "construction"); }); diff --git a/src/spatial/modules/main/spatial_functions_cast.cpp b/src/spatial/modules/main/spatial_functions_cast.cpp index 3133f582..ef960c0d 100644 --- a/src/spatial/modules/main/spatial_functions_cast.cpp +++ b/src/spatial/modules/main/spatial_functions_cast.cpp @@ -383,7 +383,6 @@ struct PointCasts { return true; } - //------------------------------------------------------------------------------------------------------------------ // Register //------------------------------------------------------------------------------------------------------------------ diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp index 020059fd..60ca7373 100644 --- a/src/spatial/modules/main/spatial_functions_scalar.cpp +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -4527,7 +4527,8 @@ struct ST_GeomFromGeoJSON { const auto root = yyjson_doc_get_root(doc); if (!yyjson_is_obj(root)) { - throw InvalidInputException("Could not parse GeoJSON input: Not a valid JSON object, (%s)", input.GetString()); + throw InvalidInputException("Could not parse GeoJSON input: Not a valid JSON object, (%s)", + input.GetString()); } bool has_z = false; @@ -7817,7 +7818,6 @@ struct ST_Point { func.SetTag("category", "construction"); }); - FunctionBuilder::RegisterScalar(loader, "ST_MakePoint", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("x", LogicalType::DOUBLE); diff --git a/src/spatial/operators/spatial_join_physical.cpp b/src/spatial/operators/spatial_join_physical.cpp index e8eb86d5..50e0eb67 100644 --- a/src/spatial/operators/spatial_join_physical.cpp +++ b/src/spatial/operators/spatial_join_physical.cpp @@ -1024,7 +1024,8 @@ class SpatialJoinGlobalSourceState final : public GlobalSourceState { column_ids.push_back(op.build_side_key_types.size() + op.build_side_payload_types.size()); // We dont need to keep the tuples aroun after scanning - state.collection->InitializeScan(scan_state, std::move(column_ids), TupleDataPinProperties::KEEP_EVERYTHING_PINNED); + state.collection->InitializeScan(scan_state, std::move(column_ids), + TupleDataPinProperties::KEEP_EVERYTHING_PINNED); tuples_maximum = state.collection->Count(); } diff --git a/src/spatial/util/math.cpp b/src/spatial/util/math.cpp index 2b93e414..d56355e5 100644 --- a/src/spatial/util/math.cpp +++ b/src/spatial/util/math.cpp @@ -7,7 +7,7 @@ namespace duckdb { // We've got this exposed upstream, we just need to wait for the next release extern "C" int geos_d2sfixed_buffered_n(double f, uint32_t precision, char *result); -template +template static void FormatDouble(T &buffer, double d, int32_t precision) { D_ASSERT(precision >= 0 && precision <= 15); char buf[512]; @@ -96,4 +96,4 @@ string MathUtil::format_coord(double x, double y, double z, double m) { #endif -} // namespace duckdb \ No newline at end of file +} // namespace duckdb diff --git a/test/sql/geometry/st_asgeojson.test b/test/sql/geometry/st_asgeojson.test index 8abc1ef5..3f486ab0 100644 --- a/test/sql/geometry/st_asgeojson.test +++ b/test/sql/geometry/st_asgeojson.test @@ -1,3 +1,6 @@ +# name: test/sql/geometry/st_asgeojson.test +# group: [geometry] + require spatial # Geometry diff --git a/test/sql/geometry/st_makepoint.test b/test/sql/geometry/st_makepoint.test index c14075ab..c6034fcf 100644 --- a/test/sql/geometry/st_makepoint.test +++ b/test/sql/geometry/st_makepoint.test @@ -1,3 +1,6 @@ +# name: test/sql/geometry/st_makepoint.test +# group: [geometry] + require spatial query I From ff2b1e9ee63c116b77e158c6a8f19f96e2239a84 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 10 Sep 2025 15:46:24 +0200 Subject: [PATCH 4/4] update function docs --- docs/functions.md | 112 +++++++++++++++++++++++ src/spatial/modules/geos/geos_module.cpp | 6 +- 2 files changed, 116 insertions(+), 2 deletions(-) diff --git a/docs/functions.md b/docs/functions.md index a648803c..89583ca4 100644 --- a/docs/functions.md +++ b/docs/functions.md @@ -12,6 +12,7 @@ | [`ST_Area_Spheroid`](#st_area_spheroid) | Returns the area of a geometry in meters, using an ellipsoidal model of the earth | | [`ST_AsGeoJSON`](#st_asgeojson) | Returns the geometry as a GeoJSON fragment | | [`ST_AsHEXWKB`](#st_ashexwkb) | Returns the geometry as a HEXWKB string | +| [`ST_AsMVTGeom`](#st_asmvtgeom) | Transform and clip geometry to a tile boundary | | [`ST_AsSVG`](#st_assvg) | Convert the geometry into a SVG fragment or path | | [`ST_AsText`](#st_astext) | Returns the geometry as a WKT string | | [`ST_AsWKB`](#st_aswkb) | Returns the geometry as a WKB (Well-Known-Binary) blob | @@ -88,6 +89,7 @@ | [`ST_MakeBox2D`](#st_makebox2d) | Create a BOX2D from two POINT geometries | | [`ST_MakeEnvelope`](#st_makeenvelope) | Create a rectangular polygon from min/max coordinates | | [`ST_MakeLine`](#st_makeline) | Create a LINESTRING from a list of POINT geometries | +| [`ST_MakePoint`](#st_makepoint) | Creates a GEOMETRY point from an pair of floating point numbers. | | [`ST_MakePolygon`](#st_makepolygon) | Create a POLYGON from a LINESTRING shell | | [`ST_MakeValid`](#st_makevalid) | Returns a valid representation of the geometry | | [`ST_MaximumInscribedCircle`](#st_maximuminscribedcircle) | Returns the maximum inscribed circle of the input geometry, optionally with a tolerance. | @@ -144,6 +146,7 @@ | Function | Summary | | --- | --- | +| [`ST_AsMVT`](#st_asmvt) | Make a Mapbox Vector Tile from a set of geometries and properties | | [`ST_CoverageInvalidEdges_Agg`](#st_coverageinvalidedges_agg) | Returns the invalid edges of a coverage geometry | | [`ST_CoverageSimplify_Agg`](#st_coveragesimplify_agg) | Simplifies a set of geometries while maintaining coverage | | [`ST_CoverageUnion_Agg`](#st_coverageunion_agg) | Unions a set of geometries while maintaining coverage | @@ -402,6 +405,26 @@ SELECT ST_AsHexWKB('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::geometry); ---- +### ST_AsMVTGeom + + +#### Signatures + +```sql +GEOMETRY ST_AsMVTGeom (geom GEOMETRY, bounds BOX_2D, extent BIGINT, buffer BIGINT, clip_geom BOOLEAN) +GEOMETRY ST_AsMVTGeom (geom GEOMETRY, bounds BOX_2D, extent BIGINT, buffer BIGINT) +GEOMETRY ST_AsMVTGeom (geom GEOMETRY, bounds BOX_2D, extent BIGINT) +GEOMETRY ST_AsMVTGeom (geom GEOMETRY, bounds BOX_2D) +``` + +#### Description + +Transform and clip geometry to a tile boundary + +See "ST_AsMVT" for more details + +---- + ### ST_AsSVG @@ -1937,6 +1960,35 @@ LINESTRING(0 0, 1 1) ---- +### ST_MakePoint + + +#### Signatures + +```sql +POINT_2D ST_MakePoint (x DOUBLE, y DOUBLE) +POINT_3D ST_MakePoint (x DOUBLE, y DOUBLE, z DOUBLE) +POINT_4D ST_MakePoint (x DOUBLE, y DOUBLE, z DOUBLE, m DOUBLE) +``` + +#### Description + +Creates a GEOMETRY point from an pair of floating point numbers. + +For geodetic coordinate systems, x is typically the longitude value and y is the latitude value. + +Note that ST_Point is equivalent. ST_MakePoint is provided for PostGIS compatibility. + +#### Example + +```sql +SELECT ST_AsText(ST_MakePoint(143.3, -24.2)); +---- +POINT (143.3 -24.2) +``` + +---- + ### ST_MakePolygon @@ -3015,6 +3067,66 @@ SELECT ST_ZMin(ST_Point(1, 2, 3)) ## Aggregate Functions +### ST_AsMVT + + +#### Signatures + +```sql +BLOB ST_AsMVT (col0 ANY) +BLOB ST_AsMVT (col0 ANY, col1 VARCHAR) +BLOB ST_AsMVT (col0 ANY, col1 VARCHAR, col2 INTEGER) +BLOB ST_AsMVT (col0 ANY, col1 VARCHAR, col2 INTEGER, col3 VARCHAR) +BLOB ST_AsMVT (col0 ANY, col1 VARCHAR, col2 INTEGER, col3 VARCHAR, col4 VARCHAR) +``` + +#### Description + +Make a Mapbox Vector Tile from a set of geometries and properties +The function takes as input a row type (STRUCT) containing a geometry column and any number of property columns. +It returns a single binary BLOB containing the Mapbox Vector Tile. + +The function has the following signature: + +`ST_AsMVT(row STRUCT, layer_name VARCHAR DEFAULT 'layer', extent INTEGER DEFAULT 4096, geom_column_name VARCHAR DEFAULT NULL, feature_id_column_name VARCHAR DEFAULT NULL) -> BLOB` + +- The first argument is a struct containing the geometry and properties. +- The second argument is the name of the layer in the vector tile. This argument is optional and defaults to 'layer'. +- The third argument is the extent of the tile. This argument is optional and defaults to 4096. +- The fourth argument is the name of the geometry column in the input row. This argument is optional. If not provided, the first geometry column in the input row will be used. If multiple geometry columns are present, an error will be raised. +- The fifth argument is the name of the feature id column in the input row. This argument is optional. If provided, the values in this column will be used as feature ids in the vector tile. The column must be of type INTEGER or BIGINT. If set to negative or NULL, a feature id will not be assigned to the corresponding feature. + +The input struct must contain exactly one geometry column of type GEOMETRY. It can contain any number of property columns of types VARCHAR, FLOAT, DOUBLE, INTEGER, BIGINT, or BOOLEAN. + +Example: +```sql +SELECT ST_AsMVT({'geom': geom, 'id': id, 'name': name}, 'cities', 4096, 'geom', 'id') AS tile +FROM cities; + ``` + +This example creates a vector tile named 'cities' with an extent of 4096 from the 'cities' table, using 'geom' as the geometry column and 'id' as the feature id column. + +However, you probably want to use the ST_AsMVTGeom function to first transform and clip your geometries to the tile extent. +The following example assumes the geometry is in WebMercator ("EPSG:3857") coordinates. +Replace `{z}`, `{x}`, and `{y}` with the appropriate tile coordinates, `{your table}` with your table name, and `{tile_path}` with the path to write the tile to. + +```sql +COPY ( + SELECT ST_AsMVT({{ + "geometry": ST_AsMVTGeom( + geometry, + ST_Extent(ST_TileEnvelope({z}, {x}, {y})), + 4096, + 256, + false + ) + }}) + FROM {your table} WHERE ST_Intersects(geometry, ST_TileEnvelope({z}, {x}, {y})) +) to {tile_path} (FORMAT 'BLOB'); +``` + +---- + ### ST_CoverageInvalidEdges_Agg diff --git a/src/spatial/modules/geos/geos_module.cpp b/src/spatial/modules/geos/geos_module.cpp index 89effe6a..87b50c67 100644 --- a/src/spatial/modules/geos/geos_module.cpp +++ b/src/spatial/modules/geos/geos_module.cpp @@ -439,9 +439,11 @@ struct ST_AsMVTGeom { variant.SetBind(Bind); }); - func.SetDescription(R"(Transform and clip geometry to a tile boundary + func.SetDescription(R"( + Transform and clip geometry to a tile boundary + + See "ST_AsMVT" for more details)"); - - See "ST_AsMVT" for more details)"); func.SetTag("ext", "spatial"); func.SetTag("category", "construction"); });