1111 Callable ,
1212 Dict ,
1313 Generator ,
14+ Iterable ,
1415 List ,
1516 Optional ,
1617 Sequence ,
2627import sqlalchemy
2728from langchain_core .documents import Document
2829from langchain_core .embeddings import Embeddings
29- from langchain_core .indexing import UpsertResponse
3030from langchain_core .utils import get_from_dict_or_env
3131from langchain_core .vectorstores import VectorStore
3232from sqlalchemy import SQLColumnExpression , cast , create_engine , delete , func , select
@@ -764,7 +764,9 @@ def add_embeddings(
764764 """
765765 assert not self ._async_engine , "This method must be called with sync_mode"
766766 if ids is None :
767- ids = [str (uuid .uuid4 ()) for _ in texts ]
767+ ids_ = [str (uuid .uuid4 ()) for _ in texts ]
768+ else :
769+ ids_ = [id if id is not None else str (uuid .uuid4 ()) for id in ids ]
768770
769771 if not metadatas :
770772 metadatas = [{} for _ in texts ]
@@ -782,7 +784,7 @@ def add_embeddings(
782784 "cmetadata" : metadata or {},
783785 }
784786 for text , metadata , embedding , id in zip (
785- texts , metadatas , embeddings , ids
787+ texts , metadatas , embeddings , ids_
786788 )
787789 ]
788790 stmt = insert (self .EmbeddingStore ).values (data )
@@ -798,7 +800,7 @@ def add_embeddings(
798800 session .execute (on_conflict_stmt )
799801 session .commit ()
800802
801- return ids
803+ return ids_
802804
803805 async def aadd_embeddings (
804806 self ,
@@ -819,8 +821,11 @@ async def aadd_embeddings(
819821 kwargs: vectorstore specific parameters
820822 """
821823 await self .__apost_init__ () # Lazy async init
824+
822825 if ids is None :
823- ids = [str (uuid .uuid1 ()) for _ in texts ]
826+ ids_ = [str (uuid .uuid4 ()) for _ in texts ]
827+ else :
828+ ids_ = [id if id is not None else str (uuid .uuid4 ()) for id in ids ]
824829
825830 if not metadatas :
826831 metadatas = [{} for _ in texts ]
@@ -838,7 +843,7 @@ async def aadd_embeddings(
838843 "cmetadata" : metadata or {},
839844 }
840845 for text , metadata , embedding , id in zip (
841- texts , metadatas , embeddings , ids
846+ texts , metadatas , embeddings , ids_
842847 )
843848 ]
844849 stmt = insert (self .EmbeddingStore ).values (data )
@@ -854,7 +859,67 @@ async def aadd_embeddings(
854859 await session .execute (on_conflict_stmt )
855860 await session .commit ()
856861
857- return ids
862+ return ids_
863+
864+ def add_texts (
865+ self ,
866+ texts : Iterable [str ],
867+ metadatas : Optional [List [dict ]] = None ,
868+ ids : Optional [List [str ]] = None ,
869+ ** kwargs : Any ,
870+ ) -> List [str ]:
871+ """Run more texts through the embeddings and add to the vectorstore.
872+
873+ Args:
874+ texts: Iterable of strings to add to the vectorstore.
875+ metadatas: Optional list of metadatas associated with the texts.
876+ ids: Optional list of ids for the texts.
877+ If not provided, will generate a new id for each text.
878+ kwargs: vectorstore specific parameters
879+
880+ Returns:
881+ List of ids from adding the texts into the vectorstore.
882+ """
883+ assert not self ._async_engine , "This method must be called without async_mode"
884+ texts_ = list (texts )
885+ embeddings = self .embedding_function .embed_documents (texts_ )
886+ return self .add_embeddings (
887+ texts = texts_ ,
888+ embeddings = list (embeddings ),
889+ metadatas = list (metadatas ) if metadatas else None ,
890+ ids = list (ids ) if ids else None ,
891+ ** kwargs ,
892+ )
893+
894+ async def aadd_texts (
895+ self ,
896+ texts : Iterable [str ],
897+ metadatas : Optional [List [dict ]] = None ,
898+ ids : Optional [List [str ]] = None ,
899+ ** kwargs : Any ,
900+ ) -> List [str ]:
901+ """Run more texts through the embeddings and add to the vectorstore.
902+
903+ Args:
904+ texts: Iterable of strings to add to the vectorstore.
905+ metadatas: Optional list of metadatas associated with the texts.
906+ ids: Optional list of ids for the texts.
907+ If not provided, will generate a new id for each text.
908+ kwargs: vectorstore specific parameters
909+
910+ Returns:
911+ List of ids from adding the texts into the vectorstore.
912+ """
913+ await self .__apost_init__ () # Lazy async init
914+ texts_ = list (texts )
915+ embeddings = await self .embedding_function .aembed_documents (texts_ )
916+ return await self .aadd_embeddings (
917+ texts = texts_ ,
918+ embeddings = list (embeddings ),
919+ metadatas = list (metadatas ) if metadatas else None ,
920+ ids = list (ids ) if ids else None ,
921+ ** kwargs ,
922+ )
858923
859924 def similarity_search (
860925 self ,
@@ -2162,64 +2227,6 @@ async def _make_async_session(self) -> AsyncGenerator[AsyncSession, None]:
21622227 async with self .session_maker () as session :
21632228 yield typing_cast (AsyncSession , session )
21642229
2165- def upsert (self , items : Sequence [Document ], / , ** kwargs : Any ) -> UpsertResponse :
2166- """Upsert documents into the vectorstore.
2167-
2168- Args:
2169- items: Sequence of documents to upsert.
2170- kwargs: vectorstore specific parameters
2171-
2172- Returns:
2173- UpsertResponse
2174- """
2175- if self ._async_engine :
2176- raise AssertionError ("This method must be called in sync mode." )
2177- texts = [item .page_content for item in items ]
2178- metadatas = [item .metadata for item in items ]
2179- ids = [item .id if item .id is not None else str (uuid .uuid4 ()) for item in items ]
2180- embeddings = self .embedding_function .embed_documents (list (texts ))
2181- added_ids = self .add_embeddings (
2182- texts = texts , embeddings = embeddings , metadatas = metadatas , ids = ids , ** kwargs
2183- )
2184- return {
2185- "succeeded" : added_ids ,
2186- "failed" : [
2187- item .id
2188- for item in items
2189- if item .id is not None and item .id not in added_ids
2190- ],
2191- }
2192-
2193- async def aupsert (
2194- self , items : Sequence [Document ], / , ** kwargs : Any
2195- ) -> UpsertResponse :
2196- """Upsert documents into the vectorstore.
2197-
2198- Args:
2199- items: Sequence of documents to upsert.
2200- kwargs: vectorstore specific parameters
2201-
2202- Returns:
2203- UpsertResponse
2204- """
2205- if not self ._async_engine :
2206- raise AssertionError ("This method must be called with async_mode" )
2207- texts = [item .page_content for item in items ]
2208- metadatas = [item .metadata for item in items ]
2209- ids = [item .id if item .id is not None else str (uuid .uuid4 ()) for item in items ]
2210- embeddings = await self .embedding_function .aembed_documents (list (texts ))
2211- added_ids = await self .aadd_embeddings (
2212- texts = texts , embeddings = embeddings , metadatas = metadatas , ids = ids , ** kwargs
2213- )
2214- return {
2215- "succeeded" : added_ids ,
2216- "failed" : [
2217- item .id
2218- for item in items
2219- if item .id is not None and item .id not in added_ids
2220- ],
2221- }
2222-
22232230 def get_by_ids (self , ids : Sequence [str ], / ) -> List [Document ]:
22242231 """Get documents by ids."""
22252232 documents = []
0 commit comments