99from sqlalchemy .engine import URL
1010from sqlalchemy .ext .asyncio import AsyncEngine , create_async_engine
1111
12+ from .hybrid_search_config import HybridSearchConfig
13+
1214T = TypeVar ("T" )
1315
1416
@@ -156,6 +158,7 @@ async def _ainit_vectorstore_table(
156158 id_column : Union [str , Column , ColumnDict ] = "langchain_id" ,
157159 overwrite_existing : bool = False ,
158160 store_metadata : bool = True ,
161+ hybrid_search_config : Optional [HybridSearchConfig ] = None ,
159162 ) -> None :
160163 """
161164 Create a table for saving of vectors to be used with PGVectorStore.
@@ -178,6 +181,8 @@ async def _ainit_vectorstore_table(
178181 overwrite_existing (bool): Whether to drop existing table. Default: False.
179182 store_metadata (bool): Whether to store metadata in the table.
180183 Default: True.
184+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration.
185+ Default: None.
181186
182187 Raises:
183188 :class:`DuplicateTableError <asyncpg.exceptions.DuplicateTableError>`: if table already exists.
@@ -186,6 +191,7 @@ async def _ainit_vectorstore_table(
186191
187192 schema_name = self ._escape_postgres_identifier (schema_name )
188193 table_name = self ._escape_postgres_identifier (table_name )
194+ hybrid_search_default_column_name = content_column + "_tsv"
189195 content_column = self ._escape_postgres_identifier (content_column )
190196 embedding_column = self ._escape_postgres_identifier (embedding_column )
191197 if metadata_columns is None :
@@ -226,10 +232,22 @@ async def _ainit_vectorstore_table(
226232 id_data_type = id_column ["data_type" ]
227233 id_column_name = id_column ["name" ]
228234
235+ hybrid_search_column = "" # Default is no TSV column for hybrid search
236+ if hybrid_search_config :
237+ hybrid_search_column_name = (
238+ hybrid_search_config .tsv_column or hybrid_search_default_column_name
239+ )
240+ hybrid_search_column_name = self ._escape_postgres_identifier (
241+ hybrid_search_column_name
242+ )
243+ hybrid_search_config .tsv_column = hybrid_search_column_name
244+ hybrid_search_column = f',"{ self ._escape_postgres_identifier (hybrid_search_column_name )} " TSVECTOR NOT NULL'
245+
229246 query = f"""CREATE TABLE "{ schema_name } "."{ table_name } "(
230247 "{ id_column_name } " { id_data_type } PRIMARY KEY,
231248 "{ content_column } " TEXT NOT NULL,
232- "{ embedding_column } " vector({ vector_size } ) NOT NULL"""
249+ "{ embedding_column } " vector({ vector_size } ) NOT NULL
250+ { hybrid_search_column } """
233251 for column in metadata_columns :
234252 if isinstance (column , Column ):
235253 nullable = "NOT NULL" if not column .nullable else ""
@@ -258,6 +276,7 @@ async def ainit_vectorstore_table(
258276 id_column : Union [str , Column , ColumnDict ] = "langchain_id" ,
259277 overwrite_existing : bool = False ,
260278 store_metadata : bool = True ,
279+ hybrid_search_config : Optional [HybridSearchConfig ] = None ,
261280 ) -> None :
262281 """
263282 Create a table for saving of vectors to be used with PGVectorStore.
@@ -280,6 +299,10 @@ async def ainit_vectorstore_table(
280299 overwrite_existing (bool): Whether to drop existing table. Default: False.
281300 store_metadata (bool): Whether to store metadata in the table.
282301 Default: True.
302+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration.
303+ Note that queries might be slow if the hybrid search column does not exist.
304+ For best hybrid search performance, consider creating a TSV column and adding GIN index.
305+ Default: None.
283306 """
284307 await self ._run_as_async (
285308 self ._ainit_vectorstore_table (
@@ -293,6 +316,7 @@ async def ainit_vectorstore_table(
293316 id_column = id_column ,
294317 overwrite_existing = overwrite_existing ,
295318 store_metadata = store_metadata ,
319+ hybrid_search_config = hybrid_search_config ,
296320 )
297321 )
298322
@@ -309,6 +333,7 @@ def init_vectorstore_table(
309333 id_column : Union [str , Column , ColumnDict ] = "langchain_id" ,
310334 overwrite_existing : bool = False ,
311335 store_metadata : bool = True ,
336+ hybrid_search_config : Optional [HybridSearchConfig ] = None ,
312337 ) -> None :
313338 """
314339 Create a table for saving of vectors to be used with PGVectorStore.
@@ -331,6 +356,10 @@ def init_vectorstore_table(
331356 overwrite_existing (bool): Whether to drop existing table. Default: False.
332357 store_metadata (bool): Whether to store metadata in the table.
333358 Default: True.
359+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration.
360+ Note that queries might be slow if the hybrid search column does not exist.
361+ For best hybrid search performance, consider creating a TSV column and adding GIN index.
362+ Default: None.
334363 """
335364 self ._run_as_sync (
336365 self ._ainit_vectorstore_table (
@@ -344,6 +373,7 @@ def init_vectorstore_table(
344373 id_column = id_column ,
345374 overwrite_existing = overwrite_existing ,
346375 store_metadata = store_metadata ,
376+ hybrid_search_config = hybrid_search_config ,
347377 )
348378 )
349379
@@ -354,7 +384,7 @@ async def _adrop_table(
354384 schema_name : str = "public" ,
355385 ) -> None :
356386 """Drop the vector store table"""
357- query = f'DROP TABLE "{ schema_name } "."{ table_name } ";'
387+ query = f'DROP TABLE IF EXISTS "{ schema_name } "."{ table_name } ";'
358388 async with self ._pool .connect () as conn :
359389 await conn .execute (text (query ))
360390 await conn .commit ()
0 commit comments