neo4j-labs · karanchellani · Mar 19, 2025 · Mar 12, 2025 · Mar 18, 2025
diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -1,63 +1,63 @@
 asyncio==3.4.3
-boto3==1.36.2
-botocore==1.36.2
-certifi==2024.8.30
-fastapi==0.115.6
+boto3==1.37.11
+botocore==1.37.11
+certifi==2025.1.31
+fastapi==0.115.11
 fastapi-health==0.4.0
-google-api-core==2.24.0
-google-auth==2.37.0
+google-api-core==2.24.2
+google-auth==2.38.0
 google_auth_oauthlib==1.2.1
-google-cloud-core==2.4.1
-json-repair==0.30.2
+google-cloud-core==2.4.3
+json-repair==0.30.3
 pip-install==1.3.5
-langchain==0.3.15
-langchain-aws==0.2.11
-langchain-anthropic==0.3.3
-langchain-fireworks==0.2.6
-langchain-community==0.3.15
-langchain-core==0.3.31
+langchain==0.3.20
+langchain-aws==0.2.15
+langchain-anthropic==0.3.9
+langchain-fireworks==0.2.7
+langchain-community==0.3.19
+langchain-core==0.3.45
 langchain-experimental==0.3.4
-langchain-google-vertexai==2.0.11
-langchain-groq==0.2.3
-langchain-openai==0.3.1
-langchain-text-splitters==0.3.5
+langchain-google-vertexai==2.0.15
+langchain-groq==0.2.5
+langchain-openai==0.3.8
+langchain-text-splitters==0.3.6
 langchain-huggingface==0.1.2
 langdetect==1.0.9
-langsmith==0.2.11
+langsmith==0.3.13
 langserve==0.3.1
 neo4j-rust-ext
 nltk==3.9.1
-openai==1.59.9
-opencv-python==4.10.0.84
-psutil==6.1.0
-pydantic==2.9.2
+openai==1.66.2
+opencv-python==4.11.0.86
+psutil==7.0.0
+pydantic==2.10.6
 python-dotenv==1.0.1
 python-magic==0.4.27
 PyPDF2==3.0.1
-PyMuPDF==1.24.14
-starlette==0.41.3
-sse-starlette==2.1.3
+PyMuPDF==1.25.3
+starlette==0.46.1
+sse-starlette==2.2.1
 starlette-session==0.4.3
 tqdm==4.67.1
 unstructured[all-docs]
-unstructured==0.16.11
-unstructured-client==0.28.1
-unstructured-inference==0.8.1
-urllib3==2.2.2
-uvicorn==0.32.1
+unstructured==0.16.25
+unstructured-client==0.31.1
+unstructured-inference==0.8.9
+urllib3==2.3.0
+uvicorn==0.34.0
 gunicorn==23.0.0
 wikipedia==1.4.0
-wrapt==1.16.0
-yarl==1.9.4
-youtube-transcript-api==0.6.3
-zipp==3.17.0
-sentence-transformers==3.3.1
-google-cloud-logging==3.11.3
-pypandoc==1.13
-graphdatascience==1.12
-Secweb==1.11.0
-ragas==0.2.11
+wrapt==1.17.2
+yarl==1.18.3
+youtube-transcript-api==1.0.0
+zipp==3.21.0
+sentence-transformers==3.4.1
+google-cloud-logging==3.11.4
+pypandoc==1.15
+graphdatascience==1.14
+Secweb==1.18.1
+ragas==0.2.14
 rouge_score==0.1.2
-langchain-neo4j==0.3.0
+langchain-neo4j==0.4.0
 pypandoc-binary==1.15
-chardet==5.2.0
+chardet==5.2.0
diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py
@@ -380,7 +380,7 @@ def create_retriever(neo_db, document_names, chat_mode_settings,search_k, score_
         retriever = neo_db.as_retriever(
             search_type="similarity_score_threshold",
             search_kwargs={
-                'k': search_k,
+                'top_k': search_k,
                 'effective_search_ratio': ef_ratio,
                 'score_threshold': score_threshold,
                 'filter': {'fileName': {'$in': document_names}}
@@ -390,7 +390,7 @@ def create_retriever(neo_db, document_names, chat_mode_settings,search_k, score_
     else:
         retriever = neo_db.as_retriever(
             search_type="similarity_score_threshold",
-            search_kwargs={'k': search_k,'effective_search_ratio': ef_ratio, 'score_threshold': score_threshold}
+            search_kwargs={'top_k': search_k,'effective_search_ratio': ef_ratio, 'score_threshold': score_threshold}
         )
         logging.info(f"Successfully created retriever with search_k={search_k}, score_threshold={score_threshold}")
     return retriever

diff --git a/backend/src/document_sources/youtube.py b/backend/src/document_sources/youtube.py
@@ -1,6 +1,7 @@
 from langchain.docstore.document import Document
 from src.shared.llm_graph_builder_exception import LLMGraphBuilderException
 from youtube_transcript_api import YouTubeTranscriptApi 
+from youtube_transcript_api.proxies import GenericProxyConfig
 import logging
 from urllib.parse import urlparse,parse_qs
 from difflib import SequenceMatcher
@@ -12,8 +13,10 @@
 def get_youtube_transcript(youtube_id):
   try:
     proxy = os.environ.get("YOUTUBE_TRANSCRIPT_PROXY") 
-    proxies = { 'https': proxy }
-    transcript_pieces = YouTubeTranscriptApi.get_transcript(youtube_id, proxies = proxies)
+    proxy_config = GenericProxyConfig(http_url=proxy, https_url=proxy) if proxy else None
+    youtube_api = YouTubeTranscriptApi(proxy_config=proxy_config)
+    transcript_pieces = youtube_api.fetch(youtube_id, preserve_formatting=True)
+    transcript_pieces = transcript_pieces.to_raw_data()
     return transcript_pieces
   except Exception as e:
     message = f"Youtube transcript is not available for youtube Id: {youtube_id}"

diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py
@@ -358,12 +358,12 @@
 
     WITH 
     CASE 
-        WHEN e.embedding IS NULL OR ({embedding_match_min} <= vector.similarity.cosine($embedding, e.embedding) AND vector.similarity.cosine($embedding, e.embedding) <= {embedding_match_max}) THEN 
+        WHEN e.embedding IS NULL OR ({embedding_match_min} <= vector.similarity.cosine($query_vector, e.embedding) AND vector.similarity.cosine($query_vector, e.embedding) <= {embedding_match_max}) THEN 
             collect {{
                 OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,1}}(:!Chunk&!Document&!__Community__) 
                 RETURN path LIMIT {entity_limit_minmax_case}
             }}
-        WHEN e.embedding IS NOT NULL AND vector.similarity.cosine($embedding, e.embedding) >  {embedding_match_max} THEN
+        WHEN e.embedding IS NOT NULL AND vector.similarity.cosine($query_vector, e.embedding) >  {embedding_match_max} THEN
             collect {{
                 OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,2}}(:!Chunk&!Document&!__Community__) 
                 RETURN path LIMIT {entity_limit_max_case}