From ee46677f92c96c365fd3b7ab358ae490d4644c4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gamboa=20Labb=C3=A9?= Date: Wed, 18 Jun 2025 17:28:39 -0400 Subject: [PATCH 1/3] change vector index logic --- .../labs/mcp/servers/unity_catalog/tools/vector_search.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py b/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py index c3c7ba7..d5033a7 100644 --- a/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py +++ b/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py @@ -8,10 +8,12 @@ # Constant storing vector index content vector column name CONTENT_VECTOR_COLUMN_NAME = "__db_content_vector" +CONTENT_VECTOR_COLUMN_STARTS_WITH= "__db_" class QueryInput(BaseModel): query: str + num_results: int class VectorSearchTool(BaseTool): @@ -45,7 +47,7 @@ def execute(self, **kwargs): results = index.similarity_search( query_text=model.query, columns=self.columns, - num_results=self.num_results, + num_results=model.num_results, ) docs = results.get("result", {}).get("data_array", []) @@ -58,7 +60,7 @@ def get_table_columns( ) -> list[str]: table_info = workspace_client.tables.get(full_table_name) return [ - col.name for col in table_info.columns if col.name != CONTENT_VECTOR_COLUMN_NAME + col.name for col in table_info.columns if (not col.name.startswith(CONTENT_VECTOR_COLUMN_STARTS_WITH) ) # col.name != CONTENT_VECTOR_COLUMN_NAME ] From fcc87c4d05b388af44971915b146f19194dc9a87 Mon Sep 17 00:00:00 2001 From: Bastianciito Date: Thu, 19 Jun 2025 10:38:05 -0400 Subject: [PATCH 2/3] starts with and ands with --- .../mcp/servers/unity_catalog/tools/vector_search.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py b/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py index d5033a7..616e96e 100644 --- a/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py +++ b/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py @@ -6,15 +6,13 @@ from databricks.labs.mcp.servers.unity_catalog.cli import CliSettings from mcp.types import TextContent, Tool as ToolSpec -# Constant storing vector index content vector column name -CONTENT_VECTOR_COLUMN_NAME = "__db_content_vector" +# Constant to filter vector column and expose by default the otherS columns presented in the index CONTENT_VECTOR_COLUMN_STARTS_WITH= "__db_" +CONTENT_VECTOR_COLUMN_ENDS_WITH= "_vector" class QueryInput(BaseModel): query: str - num_results: int - class VectorSearchTool(BaseTool): def __init__( @@ -47,7 +45,7 @@ def execute(self, **kwargs): results = index.similarity_search( query_text=model.query, columns=self.columns, - num_results=model.num_results, + num_results=self.num_results, ) docs = results.get("result", {}).get("data_array", []) @@ -59,8 +57,9 @@ def get_table_columns( workspace_client: WorkspaceClient, full_table_name: str ) -> list[str]: table_info = workspace_client.tables.get(full_table_name) + return [ - col.name for col in table_info.columns if (not col.name.startswith(CONTENT_VECTOR_COLUMN_STARTS_WITH) ) # col.name != CONTENT_VECTOR_COLUMN_NAME + col.name for col in table_info.columns if not (col.name.startswith(CONTENT_VECTOR_COLUMN_STARTS_WITH) and col.name.endswith(CONTENT_VECTOR_COLUMN_ENDS_WITH) ) ] From dc7b524957b98310db7eaac644b9bcfbea73319b Mon Sep 17 00:00:00 2001 From: bastiancito Date: Fri, 20 Jun 2025 23:16:03 -0400 Subject: [PATCH 3/3] coment added + fmt + lint --- .../servers/unity_catalog/tools/vector_search.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py b/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py index 616e96e..9807172 100644 --- a/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py +++ b/src/databricks/labs/mcp/servers/unity_catalog/tools/vector_search.py @@ -6,14 +6,16 @@ from databricks.labs.mcp.servers.unity_catalog.cli import CliSettings from mcp.types import TextContent, Tool as ToolSpec -# Constant to filter vector column and expose by default the otherS columns presented in the index -CONTENT_VECTOR_COLUMN_STARTS_WITH= "__db_" -CONTENT_VECTOR_COLUMN_ENDS_WITH= "_vector" +# Constants used to identify vector columns by name. +# Columns matching both patterns will be excluded; all others will be returned. +CONTENT_VECTOR_COLUMN_STARTS_WITH = "__db_" +CONTENT_VECTOR_COLUMN_ENDS_WITH = "_vector" class QueryInput(BaseModel): query: str + class VectorSearchTool(BaseTool): def __init__( self, @@ -59,7 +61,12 @@ def get_table_columns( table_info = workspace_client.tables.get(full_table_name) return [ - col.name for col in table_info.columns if not (col.name.startswith(CONTENT_VECTOR_COLUMN_STARTS_WITH) and col.name.endswith(CONTENT_VECTOR_COLUMN_ENDS_WITH) ) + col.name + for col in table_info.columns + if not ( + col.name.startswith(CONTENT_VECTOR_COLUMN_STARTS_WITH) + and col.name.endswith(CONTENT_VECTOR_COLUMN_ENDS_WITH) + ) ]