Skip to content

Issue 4130 similaritySearch throwing Exception due to schema name fix #4166

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,8 @@ static boolean isValidNameForDatabaseObject(String name) {
return false;
}

// Check if the table or schema has Only alphanumeric characters and underscores
// and should be less than 64 characters
if (!name.matches("^[a-zA-Z0-9_]{1,64}$")) {
// Check basic constraints: non-empty and reasonable length
if (name.trim().isEmpty() || name.length() > 64) {
return false;
}

Expand All @@ -60,6 +59,21 @@ static boolean isValidNameForDatabaseObject(String name) {
return false;
}

// Reject names with potentially dangerous characters that could indicate SQL
// injection
// These would be problematic even when quoted
if (name.contains(";") || // Statement separator
name.contains("--") || // SQL comment
name.contains("/*") || // Block comment start
name.contains("*/") || // Block comment end
name.contains("\0") || // Null byte
name.contains("\n") || // Newline
name.contains("\r") || // Carriage return
name.toLowerCase().matches(".*\\b(drop|alter|create|insert|update|delete|select)\\b.*")) { // SQL
// keywords
return false;
}

return true;

}
Expand All @@ -80,11 +94,11 @@ void validateTableSchema(String schemaName, String tableName) {

if (!isValidNameForDatabaseObject(schemaName)) {
throw new IllegalArgumentException(
"Schema name should only contain alphanumeric characters and underscores");
"Schema name must be non-empty, not exceed 64 characters, and not contain dangerous characters");
}
if (!isValidNameForDatabaseObject(tableName)) {
throw new IllegalArgumentException(
"Table name should only contain alphanumeric characters and underscores");
"Table name must be non-empty, not exceed 64 characters, and not contain dangerous characters");
}

if (!isTableExists(schemaName, tableName)) {
Expand Down Expand Up @@ -138,13 +152,14 @@ void validateTableSchema(String schemaName, String tableName) {
+ " CREATE EXTENSION IF NOT EXISTS vector;\n" + " CREATE EXTENSION IF NOT EXISTS hstore;\n"
+ " CREATE EXTENSION IF NOT EXISTS \"uuid-ossp\";\n"
+ "2. Verify that the table exists with the appropriate structure. If it does not exist, create it using a SQL command similar to the following, replacing 'embedding_dimensions' with the appropriate size based on your vector embeddings:\n"
+ String.format(" CREATE TABLE IF NOT EXISTS %s (\n"
+ String.format(" CREATE TABLE IF NOT EXISTS \"%s\".\"%s\" (\n"
+ " id uuid DEFAULT uuid_generate_v4() PRIMARY KEY,\n" + " content text,\n"
+ " metadata json,\n"
+ " embedding vector(embedding_dimensions) // Replace 'embedding_dimensions' with your specific value\n"
+ " );\n", schemaName + "." + tableName)
+ " );\n", schemaName, tableName)
+ "3. Create an appropriate index for the vector embedding to optimize performance. Adjust the index type and options based on your usage. Example SQL for creating an index:\n"
+ String.format(" CREATE INDEX ON %s USING HNSW (embedding vector_cosine_ops);\n", tableName)
+ String.format(" CREATE INDEX ON \"%s\".\"%s\" USING HNSW (embedding vector_cosine_ops);\n",
schemaName, tableName)
+ "\nPlease adjust these commands based on your specific configuration and the capabilities of your vector database system.");
throw new IllegalStateException(e);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
* </p>
* <pre>{@code
* PgVectorStore vectorStore = PgVectorStore.builder(jdbcTemplate, embeddingModel)
* .schemaName("custom_schema")
* .schemaName("custom-schema") // Special characters like hyphens are supported
* .vectorTableName("custom_vectors")
* .distanceType(PgDistanceType.NEGATIVE_INNER_PRODUCT)
* .removeExistingVectorStoreTable(true)
Expand Down Expand Up @@ -429,7 +429,7 @@ public void afterPropertiesSet() {
this.jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS \"uuid-ossp\"");
}

this.jdbcTemplate.execute(String.format("CREATE SCHEMA IF NOT EXISTS %s", this.getSchemaName()));
this.jdbcTemplate.execute(String.format("CREATE SCHEMA IF NOT EXISTS \"%s\"", this.getSchemaName()));

// Remove existing VectorStoreTable
if (this.removeExistingVectorStoreTable) {
Expand All @@ -453,8 +453,15 @@ embedding vector(%d)
}
}

/**
* Returns the fully qualified table name with proper PostgreSQL identifier quoting.
* This method ensures that schema and table names containing special characters (such
* as hyphens, spaces, or reserved keywords) are properly quoted to prevent SQL syntax
* errors.
* @return the fully qualified table name in the format "schema"."table"
*/
private String getFullyQualifiedTableName() {
return this.schemaName + "." + this.vectorTableName;
return "\"" + this.schemaName + "\".\"" + this.vectorTableName + "\"";
}

private PgIdType getIdType() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Copyright 2023-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.springframework.ai.vectorstore.pgvector;

import java.lang.reflect.Method;
import java.util.List;
import java.util.Map;

import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;

import org.springframework.ai.document.Document;
import org.springframework.ai.embedding.EmbeddingModel;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowMapper;

import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

/**
* Unit test to demonstrate the schema quoting fix for GitHub issue #4130. Verifies that
* hyphenated schema names are properly quoted in SQL generation.
*
* @author Claude Code Assistant
*/
public class PgVectorStoreSchemaQuotingTest {

/**
* Verifies that hyphenated schema names (issue #4130) are properly quoted in SQL
* identifiers.
*/
@Test
public void shouldProperlyQuoteHyphenatedSchemaNames() throws Exception {
JdbcTemplate jdbcTemplate = mock(JdbcTemplate.class);
EmbeddingModel embeddingModel = mock(EmbeddingModel.class);
when(embeddingModel.dimensions()).thenReturn(1536);

// Create PgVectorStore with the problematic hyphenated schema from issue #4130
PgVectorStore vectorStore = PgVectorStore.builder(jdbcTemplate, embeddingModel)
.schemaName("demo-1998")
.vectorTableName("vector_store")
.initializeSchema(false)
.build();

// Access the private method to verify quoting behavior
Method getFullyQualifiedTableNameMethod = PgVectorStore.class.getDeclaredMethod("getFullyQualifiedTableName");
getFullyQualifiedTableNameMethod.setAccessible(true);
String fullyQualifiedTableName = (String) getFullyQualifiedTableNameMethod.invoke(vectorStore);

// Verify proper PostgreSQL identifier quoting
assertThat(fullyQualifiedTableName).isEqualTo("\"demo-1998\".\"vector_store\"");
}

/**
* Verifies that similarity search generates properly quoted SQL for hyphenated
* schemas.
*/
@Test
public void shouldGenerateQuotedSQLInSimilaritySearch() throws Exception {
JdbcTemplate jdbcTemplate = mock(JdbcTemplate.class);
EmbeddingModel embeddingModel = mock(EmbeddingModel.class);

when(embeddingModel.dimensions()).thenReturn(1536);
when(embeddingModel.embed(anyString())).thenReturn(new float[] { 1.0f, 2.0f, 3.0f });
when(jdbcTemplate.query(anyString(), any(RowMapper.class), any(), any(), any(), any()))
.thenReturn(List.of(Document.builder().id("1").text("test").metadata(Map.of("distance", 0.5)).build()));

PgVectorStore vectorStore = PgVectorStore.builder(jdbcTemplate, embeddingModel)
.schemaName("demo-1998")
.vectorTableName("vector_store")
.initializeSchema(false)
.build();

// Execute similarity search
vectorStore.doSimilaritySearch(SearchRequest.builder().query("test").topK(5).build());

// Verify the generated SQL contains properly quoted identifiers
ArgumentCaptor<String> sqlCaptor = ArgumentCaptor.forClass(String.class);
verify(jdbcTemplate).query(sqlCaptor.capture(), any(RowMapper.class), any(), any(), any(), any());

String generatedSQL = sqlCaptor.getValue();
assertThat(generatedSQL).contains("\"demo-1998\".\"vector_store\"");
}

}
Loading