diff --git a/.github/ISSUE_TEMPLATE/bug-report..md b/.github/ISSUE_TEMPLATE/bug-report..md new file mode 100644 index 000000000..46a100d00 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report..md @@ -0,0 +1,25 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +## Guidelines +Please note that GitHub issues are only meant for bug reports/feature requests. + + +Before creating a new issue, please check whether someone else has raised the same issue. You may be able to add context to that issue instead of duplicating the report. However, each issue should also only be focussed on a _single_ problem, so do not describe new problems within an existing thread - these are very hard to track and manage, and your problem may be ignored. Finally, do not append comments to closed issues; if the same problem re-occurs, open a new issue, and include a link to the old one. + +To help us understand your issue, please specify important details, primarily: + +- LLM Graph builder version: X.Y.Z +- Neo4j Database version: X.Y.Z (Community/Enterprise/Aura). + +- **Steps to reproduce** +- Expected behavior +- Actual behavior + +Additionally, include (as appropriate) screenshots, drawings, etc. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 000000000..632385c5d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,25 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: feature +assignees: '' + +--- + +## Guidelines +Please note that GitHub issues are only meant for bug reports/feature requests. + +## Feature request template + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. \ No newline at end of file diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..bce3e3260 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,13 @@ +version: 2 +updates: + - package-ecosystem: 'npm' + directory: '/frontend' + schedule: + interval: 'weekly' + target-branch: 'dev' + + - package-ecosystem: 'pip' + directory: '/backend' + schedule: + interval: 'weekly' + target-branch: 'dev' \ No newline at end of file diff --git a/README.md b/README.md index 42b494ea3..2663a0863 100644 --- a/README.md +++ b/README.md @@ -36,10 +36,6 @@ According to enviornment we are configuring the models which is indicated by VIT EX: ```env VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" - -You can then run Docker Compose to build and start all components: -```bash -docker-compose up --build ``` #### Additional configs @@ -159,14 +155,14 @@ Allow unauthenticated request : Yes ## LLMs Supported 1. OpenAI 2. Gemini -3. Azure OpenAI(dev) -4. Anthropic(dev) -5. Fireworks(dev) -6. Groq(dev) -7. Amazon Bedrock(dev) -8. Ollama(dev) +3. Azure OpenAI(dev deployed version) +4. Anthropic(dev deployed version) +5. Fireworks(dev deployed version) +6. Groq(dev deployed version) +7. Amazon Bedrock(dev deployed version) +8. Ollama(dev deployed version) 9. Diffbot -10. Other OpenAI compabtile baseurl models(dev) +10. Other OpenAI compabtile baseurl models(dev deployed version) ## For local llms (Ollama) 1. Pull the docker imgage of ollama @@ -177,23 +173,27 @@ docker pull ollama/ollama ```bash docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama ``` -3. Execute any llm model ex🦙3 +3. Pull specific ollama model. +```bash +ollama pull llama3 +``` +4. Execute any llm model ex🦙3 ```bash docker exec -it ollama ollama run llama3 ``` -4. Configure env variable in docker compose or backend environment. +5. Configure env variable in docker compose. ```env LLM_MODEL_CONFIG_ollama_ #example LLM_MODEL_CONFIG_ollama_llama3=${LLM_MODEL_CONFIG_ollama_llama3-llama3, http://host.docker.internal:11434} ``` -5. Configure the backend API url +6. Configure the backend API url ```env VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-backendurl} ``` -6. Open the application in browser and select the ollama model for the extraction. -7. Enjoy Graph Building. +7. Open the application in browser and select the ollama model for the extraction. +8. Enjoy Graph Building. ## Usage diff --git a/backend/example.env b/backend/example.env index 9400ad747..4a66791a9 100644 --- a/backend/example.env +++ b/backend/example.env @@ -31,8 +31,10 @@ DEFAULT_DIFFBOT_CHAT_MODEL="openai_gpt_4o" #whichever model specified here , ne LLM_MODEL_CONFIG_openai_gpt_3.5="gpt-3.5-turbo-0125,openai_api_key" LLM_MODEL_CONFIG_openai_gpt_4o_mini="gpt-4o-mini-2024-07-18,openai_api_key" LLM_MODEL_CONFIG_openai_gpt_4o="gpt-4o-2024-11-20,openai_api_key" +LLM_MODEL_CONFIG_openai-gpt-o3-mini="o3-mini-2025-01-31,openai_api_key" LLM_MODEL_CONFIG_gemini_1.5_pro="gemini-1.5-pro-002" LLM_MODEL_CONFIG_gemini_1.5_flash="gemini-1.5-flash-002" +LLM_MODEL_CONFIG_gemini_2.0_flash="gemini-2.0-flash-001" LLM_MODEL_CONFIG_diffbot="diffbot,diffbot_api_key" LLM_MODEL_CONFIG_azure_ai_gpt_35="azure_deployment_name,azure_endpoint or base_url,azure_api_key,api_version" LLM_MODEL_CONFIG_azure_ai_gpt_4o="gpt-4o,https://YOUR-ENDPOINT.openai.azure.com/,azure_api_key,api_version" @@ -44,8 +46,10 @@ LLM_MODEL_CONFIG_ollama_llama3="model_name,model_local_url" YOUTUBE_TRANSCRIPT_PROXY="https://user:pass@domain:port" EFFECTIVE_SEARCH_RATIO=5 GRAPH_CLEANUP_MODEL="openai_gpt_4o" -CHUNKS_TO_BE_CREATED="50" BEDROCK_EMBEDDING_MODEL="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.titan-embed-text-v1" LLM_MODEL_CONFIG_bedrock_nova_micro_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-micro-v1:0" LLM_MODEL_CONFIG_bedrock_nova_lite_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-lite-v1:0" LLM_MODEL_CONFIG_bedrock_nova_pro_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-pro-v1:0" +LLM_MODEL_CONFIG_fireworks_deepseek_r1="model_name,fireworks_api_key" #model_name="accounts/fireworks/models/deepseek-r1" +LLM_MODEL_CONFIG_fireworks_deepseek_v3="model_name,fireworks_api_key" #model_name="accounts/fireworks/models/deepseek-v3" +MAX_TOKEN_CHUNK_SIZE=2000 #Max token used to process/extract the file content. \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index ee6a49bff..9836a1cb7 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,33 +1,33 @@ asyncio==3.4.3 -boto3==1.35.90 -botocore==1.35.90 +boto3==1.36.2 +botocore==1.36.2 certifi==2024.8.30 fastapi==0.115.6 fastapi-health==0.4.0 -google-api-core==2.23.0 -google-auth==2.36.0 +google-api-core==2.24.0 +google-auth==2.37.0 google_auth_oauthlib==1.2.1 google-cloud-core==2.4.1 json-repair==0.30.2 pip-install==1.3.5 -langchain==0.3.13 -langchain-aws==0.2.10 -langchain-anthropic==0.3.0 -langchain-fireworks==0.2.5 -langchain-community==0.3.13 -langchain-core==0.3.28 +langchain==0.3.15 +langchain-aws==0.2.11 +langchain-anthropic==0.3.3 +langchain-fireworks==0.2.6 +langchain-community==0.3.15 +langchain-core==0.3.31 langchain-experimental==0.3.4 -langchain-google-vertexai==2.0.7 -langchain-groq==0.2.1 -langchain-openai==0.2.14 -langchain-text-splitters==0.3.4 +langchain-google-vertexai==2.0.11 +langchain-groq==0.2.3 +langchain-openai==0.3.1 +langchain-text-splitters==0.3.5 langchain-huggingface==0.1.2 langdetect==1.0.9 -langsmith==0.2.4 -langserve==0.3.0 +langsmith==0.2.11 +langserve==0.3.1 neo4j-rust-ext nltk==3.9.1 -openai==1.58.1 +openai==1.59.9 opencv-python==4.10.0.84 psutil==6.1.0 pydantic==2.9.2 @@ -56,7 +56,6 @@ google-cloud-logging==3.11.3 pypandoc==1.13 graphdatascience==1.12 Secweb==1.11.0 -ragas==0.2.6 +ragas==0.2.11 rouge_score==0.1.2 -langchain-neo4j==0.2.0 - +langchain-neo4j==0.3.0 diff --git a/backend/score.py b/backend/score.py index e309fa9fc..7e7ceff7b 100644 --- a/backend/score.py +++ b/backend/score.py @@ -11,14 +11,14 @@ from langchain_google_vertexai import ChatVertexAI from src.api_response import create_api_response from src.graphDB_dataAccess import graphDBdataAccess -from src.graph_query import get_graph_results,get_chunktext_results +from src.graph_query import get_graph_results,get_chunktext_results,visualize_schema from src.chunkid_entities import get_entities_from_chunkids from src.post_processing import create_vector_fulltext_indexes, create_entity_embedding, graph_schema_consolidation from sse_starlette.sse import EventSourceResponse from src.communities import create_communities from src.neighbours import get_neighbour_nodes import json -from typing import List +from typing import List, Optional from google.oauth2.credentials import Credentials import os from src.logger import CustomLogger @@ -82,7 +82,7 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send): app = FastAPI() app.add_middleware(XContentTypeOptions) app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) -app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext"]) +app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext","/schema_visualization"]) app.add_middleware( CORSMiddleware, allow_origins=["*"], @@ -101,11 +101,11 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send): @app.post("/url/scan") async def create_source_knowledge_graph_url( - uri=Form(), - userName=Form(), - password=Form(), + uri=Form(None), + userName=Form(None), + password=Form(None), source_url=Form(None), - database=Form(), + database=Form(None), aws_access_key_id=Form(None), aws_secret_access_key=Form(None), wiki_query=Form(None), @@ -115,7 +115,7 @@ async def create_source_knowledge_graph_url( source_type=Form(None), gcs_project_id=Form(None), access_token=Form(None), - email=Form() + email=Form(None) ): try: @@ -172,11 +172,11 @@ async def create_source_knowledge_graph_url( @app.post("/extract") async def extract_knowledge_graph_from_file( - uri=Form(), - userName=Form(), - password=Form(), + uri=Form(None), + userName=Form(None), + password=Form(None), model=Form(), - database=Form(), + database=Form(None), source_url=Form(None), aws_access_key_id=Form(None), aws_secret_access_key=Form(None), @@ -189,11 +189,14 @@ async def extract_knowledge_graph_from_file( file_name=Form(None), allowedNodes=Form(None), allowedRelationship=Form(None), + token_chunk_size: Optional[int] = Form(None), + chunk_overlap: Optional[int] = Form(None), + chunks_to_combine: Optional[int] = Form(None), language=Form(None), access_token=Form(None), retry_condition=Form(None), additional_instructions=Form(None), - email=Form() + email=Form(None) ): """ Calls 'extract_graph_from_file' in a new thread to create Neo4jGraph from a @@ -215,22 +218,22 @@ async def extract_knowledge_graph_from_file( graphDb_data_Access = graphDBdataAccess(graph) merged_file_path = os.path.join(MERGED_DIR,file_name) if source_type == 'local file': - uri_latency, result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) elif source_type == 's3 bucket' and source_url: - uri_latency, result = await extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) elif source_type == 'web-url': - uri_latency, result = await extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) elif source_type == 'youtube' and source_url: - uri_latency, result = await extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) elif source_type == 'Wikipedia' and wiki_query: - uri_latency, result = await extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) elif source_type == 'gcs bucket' and gcs_bucket_name: - uri_latency, result = await extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) else: return create_api_response('Failed',message='source_type is other than accepted source') extract_api_time = time.time() - start_time @@ -278,7 +281,7 @@ async def extract_knowledge_graph_from_file( failed_file_process(uri,file_name, merged_file_path, source_type) node_detail = graphDb_data_Access.get_current_status_document_node(file_name) # Set the status "Completed" in logging becuase we are treating these error already handled by application as like custom errors. - json_obj = {'api_name':'extract','message':error_message,'file_created_at':node_detail[0]['created_time'],'error_message':error_message, 'file_name': file_name,'status':'Completed', + json_obj = {'api_name':'extract','message':error_message,'file_created_at':formatted_time(node_detail[0]['created_time']),'error_message':error_message, 'file_name': file_name,'status':'Completed', 'db_url':uri, 'userName':userName, 'database':database,'success_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc)),'email':email} logger.log_struct(json_obj, "INFO") return create_api_response("Failed", message = error_message, error=error_message, file_name=file_name) @@ -289,7 +292,7 @@ async def extract_knowledge_graph_from_file( failed_file_process(uri,file_name, merged_file_path, source_type) node_detail = graphDb_data_Access.get_current_status_document_node(file_name) - json_obj = {'api_name':'extract','message':message,'file_created_at':node_detail[0]['created_time'],'error_message':error_message, 'file_name': file_name,'status':'Failed', + json_obj = {'api_name':'extract','message':message,'file_created_at':formatted_time(node_detail[0]['created_time']),'error_message':error_message, 'file_name': file_name,'status':'Failed', 'db_url':uri, 'userName':userName, 'database':database,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc)),'email':email} logger.log_struct(json_obj, "ERROR") return create_api_response('Failed', message=message + error_message[:100], error=error_message, file_name = file_name) @@ -297,13 +300,18 @@ async def extract_knowledge_graph_from_file( gc.collect() @app.get("/sources_list") -async def get_source_list(uri:str, userName:str, password:str, email:str, database:str=None): +async def get_source_list(uri:str=None, userName:str=None, password:str=None, email:str=None, database:str=None): """ Calls 'get_source_list_from_graph' which returns list of sources which already exist in databse """ try: start = time.time() - decoded_password = decode_password(password) + if password is not None and password != "null": + decoded_password = decode_password(password) + else: + decoded_password = None + userName = None + database = None if " " in uri: uri = uri.replace(" ","+") result = await asyncio.to_thread(get_source_list_from_graph,uri,userName,decoded_password,database) @@ -320,7 +328,7 @@ async def get_source_list(uri:str, userName:str, password:str, email:str, databa return create_api_response(job_status, message=message, error=error_message) @app.post("/post_processing") -async def post_processing(uri=Form(), userName=Form(), password=Form(), database=Form(), tasks=Form(None), email=Form()): +async def post_processing(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None), tasks=Form(None), email=Form(None)): try: graph = create_graph_database_connection(uri, userName, password, database) tasks = set(map(str.strip, json.loads(tasks))) @@ -377,7 +385,7 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database gc.collect() @app.post("/chat_bot") -async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), database=Form(),question=Form(None), document_names=Form(None),session_id=Form(None),mode=Form(None),email=Form()): +async def chat_bot(uri=Form(None),model=Form(None),userName=Form(None), password=Form(None), database=Form(None),question=Form(None), document_names=Form(None),session_id=Form(None),mode=Form(None),email=Form(None)): logging.info(f"QA_RAG called at {datetime.now()}") qa_rag_start_time = time.time() try: @@ -409,10 +417,10 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), gc.collect() @app.post("/chunk_entities") -async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), nodedetails=Form(None),entities=Form(),mode=Form(),email=Form()): +async def chunk_entities(uri=Form(None),userName=Form(None), password=Form(None), database=Form(None), nodedetails=Form(None),entities=Form(),mode=Form(),email=Form(None)): try: start = time.time() - result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,nodedetails=nodedetails,entities=entities,mode=mode) + result = await asyncio.to_thread(get_entities_from_chunkids,nodedetails=nodedetails,entities=entities,mode=mode,uri=uri, username=userName, password=password, database=database) end = time.time() elapsed_time = end - start json_obj = {'api_name':'chunk_entities','db_url':uri, 'userName':userName, 'database':database, 'nodedetails':nodedetails,'entities':entities, @@ -429,7 +437,7 @@ async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=F gc.collect() @app.post("/get_neighbours") -async def get_neighbours(uri=Form(),userName=Form(), password=Form(), database=Form(), elementId=Form(None),email=Form()): +async def get_neighbours(uri=Form(None),userName=Form(None), password=Form(None), database=Form(None), elementId=Form(None),email=Form(None)): try: start = time.time() result = await asyncio.to_thread(get_neighbour_nodes,uri=uri, username=userName, password=password,database=database, element_id=elementId) @@ -449,12 +457,12 @@ async def get_neighbours(uri=Form(),userName=Form(), password=Form(), database=F @app.post("/graph_query") async def graph_query( - uri: str = Form(), - database: str = Form(), - userName: str = Form(), - password: str = Form(), + uri: str = Form(None), + database: str = Form(None), + userName: str = Form(None), + password: str = Form(None), document_names: str = Form(None), - email=Form() + email=Form(None) ): try: start = time.time() @@ -482,7 +490,7 @@ async def graph_query( @app.post("/clear_chat_bot") -async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=Form(), session_id=Form(None),email=Form()): +async def clear_chat_bot(uri=Form(None),userName=Form(None), password=Form(None), database=Form(None), session_id=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -502,20 +510,18 @@ async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=F gc.collect() @app.post("/connect") -async def connect(uri=Form(), userName=Form(), password=Form(), database=Form(),email=Form()): +async def connect(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph, database) gcs_file_cache = os.environ.get('GCS_FILE_CACHE') - chunk_to_be_created = int(os.environ.get('CHUNKS_TO_BE_CREATED', '50')) end = time.time() elapsed_time = end - start json_obj = {'api_name':'connect','db_url':uri, 'userName':userName, 'database':database, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}','email':email} logger.log_struct(json_obj, "INFO") result['elapsed_api_time'] = f'{elapsed_time:.2f}' result['gcs_file_cache'] = gcs_file_cache - result['chunk_to_be_created']= chunk_to_be_created return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -526,8 +532,8 @@ async def connect(uri=Form(), userName=Form(), password=Form(), database=Form(), @app.post("/upload") async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber=Form(None), totalChunks=Form(None), - originalname=Form(None), model=Form(None), uri=Form(), userName=Form(), - password=Form(), database=Form(),email=Form()): + originalname=Form(None), model=Form(None), uri=Form(None), userName=Form(None), + password=Form(None), database=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -551,7 +557,7 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber gc.collect() @app.post("/schema") -async def get_structured_schema(uri=Form(), userName=Form(), password=Form(), database=Form(),email=Form()): +async def get_structured_schema(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -582,14 +588,20 @@ def encode_password(pwd): return encoded_pwd_bytes @app.get("/update_extract_status/{file_name}") -async def update_extract_status(request:Request, file_name, url, userName, password, database): +async def update_extract_status(request: Request, file_name: str, uri:str=None, userName:str=None, password:str=None, database:str=None): async def generate(): status = '' - decoded_password = decode_password(password) - uri = url - if " " in url: - uri= url.replace(" ","+") - graph = create_graph_database_connection(uri, userName, decoded_password, database) + + if password is not None and password != "null": + decoded_password = decode_password(password) + else: + decoded_password = None + + url = uri + if url and " " in url: + url= url.replace(" ","+") + + graph = create_graph_database_connection(url, userName, decoded_password, database) graphDb_data_Access = graphDBdataAccess(graph) while True: try: @@ -625,14 +637,14 @@ async def generate(): return EventSourceResponse(generate(),ping=60) @app.post("/delete_document_and_entities") -async def delete_document_and_entities(uri=Form(), - userName=Form(), - password=Form(), - database=Form(), +async def delete_document_and_entities(uri=Form(None), + userName=Form(None), + password=Form(None), + database=Form(None), filenames=Form(), source_types=Form(), deleteEntities=Form(), - email=Form()): + email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -695,7 +707,7 @@ async def get_document_status(file_name, url, userName, password, database): return create_api_response('Failed',message=message) @app.post("/cancelled_job") -async def cancelled_job(uri=Form(), userName=Form(), password=Form(), database=Form(), filenames=Form(None), source_types=Form(None),email=Form()): +async def cancelled_job(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None), filenames=Form(None), source_types=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -716,7 +728,7 @@ async def cancelled_job(uri=Form(), userName=Form(), password=Form(), database=F gc.collect() @app.post("/populate_graph_schema") -async def populate_graph_schema(input_text=Form(None), model=Form(None), is_schema_description_checked=Form(None),email=Form()): +async def populate_graph_schema(input_text=Form(None), model=Form(None), is_schema_description_checked=Form(None),email=Form(None)): try: start = time.time() result = populate_graph_schema_from_text(input_text, model, is_schema_description_checked) @@ -735,7 +747,7 @@ async def populate_graph_schema(input_text=Form(None), model=Form(None), is_sche gc.collect() @app.post("/get_unconnected_nodes_list") -async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form(), database=Form(),email=Form()): +async def get_unconnected_nodes_list(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -756,7 +768,7 @@ async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form( gc.collect() @app.post("/delete_unconnected_nodes") -async def delete_orphan_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),unconnected_entities_list=Form(),email=Form()): +async def delete_orphan_nodes(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),unconnected_entities_list=Form(),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -777,7 +789,7 @@ async def delete_orphan_nodes(uri=Form(), userName=Form(), password=Form(), data gc.collect() @app.post("/get_duplicate_nodes") -async def get_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),email=Form()): +async def get_duplicate_nodes(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -798,7 +810,7 @@ async def get_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), data gc.collect() @app.post("/merge_duplicate_nodes") -async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),duplicate_nodes_list=Form(),email=Form()): +async def merge_duplicate_nodes(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),duplicate_nodes_list=Form(),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -820,7 +832,7 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da gc.collect() @app.post("/drop_create_vector_index") -async def drop_create_vector_index(uri=Form(), userName=Form(), password=Form(), database=Form(), isVectorIndexExist=Form(),email=Form()): +async def drop_create_vector_index(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None), isVectorIndexExist=Form(),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -842,7 +854,7 @@ async def drop_create_vector_index(uri=Form(), userName=Form(), password=Form(), gc.collect() @app.post("/retry_processing") -async def retry_processing(uri=Form(), userName=Form(), password=Form(), database=Form(), file_name=Form(), retry_condition=Form(), email=Form()): +async def retry_processing(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None), file_name=Form(), retry_condition=Form(), email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -938,13 +950,13 @@ async def calculate_additional_metrics(question: str = Form(), @app.post("/fetch_chunktext") async def fetch_chunktext( - uri: str = Form(), - database: str = Form(), - userName: str = Form(), - password: str = Form(), + uri: str = Form(None), + database: str = Form(None), + userName: str = Form(None), + password: str = Form(None), document_name: str = Form(), page_no: int = Form(1), - email=Form() + email=Form(None) ): try: start = time.time() @@ -991,26 +1003,21 @@ async def backend_connection_configuration(): database= os.getenv('NEO4J_DATABASE') password= os.getenv('NEO4J_PASSWORD') gcs_file_cache = os.environ.get('GCS_FILE_CACHE') - chunk_to_be_created = int(os.environ.get('CHUNKS_TO_BE_CREATED', '50')) if all([uri, username, database, password]): graph = Neo4jGraph() logging.info(f'login connection status of object: {graph}') if graph is not None: graph_connection = True - encoded_password = encode_password(password) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.connection_check_and_get_vector_dimensions(database) - result["graph_connection"] = graph_connection - result["uri"] = uri - result["user_name"] = username - result["database"] = database - result["password"] = encoded_password result['gcs_file_cache'] = gcs_file_cache - result['chunk_to_be_created']= chunk_to_be_created + result['uri'] = uri end = time.time() elapsed_time = end - start result['api_name'] = 'backend_connection_configuration' result['elapsed_api_time'] = f'{elapsed_time:.2f}' + result['graph_connection'] = f'{graph_connection}', + result['connection_from'] = 'backendAPI' logger.log_struct(result, "INFO") return create_api_response('Success',message=f"Backend connection successful",data=result) else: @@ -1025,6 +1032,32 @@ async def backend_connection_configuration(): return create_api_response(job_status, message=message, error=error_message.rstrip('.') + ', or fill from the login dialog.', data=graph_connection) finally: gc.collect() - + +@app.post("/schema_visualization") +async def get_schema_visualization(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None)): + try: + start = time.time() + result = await asyncio.to_thread(visualize_schema, + uri=uri, + userName=userName, + password=password, + database=database) + if result: + logging.info("Graph schema visualization query successful") + end = time.time() + elapsed_time = end - start + logging.info(f'Schema result from DB: {result}') + json_obj = {'api_name':'schema_visualization','db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") + return create_api_response('Success', data=result,message=f"Total elapsed API time {elapsed_time:.2f}") + except Exception as e: + message="Unable to get schema visualization from neo4j database" + error_message = str(e) + logging.info(message) + logging.exception(f'Exception:{error_message}') + return create_api_response("Failed", message=message, error=error_message) + finally: + gc.collect() + if __name__ == "__main__": uvicorn.run(app) \ No newline at end of file diff --git a/backend/src/communities.py b/backend/src/communities.py index a38b39696..0ecf493cc 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -193,6 +193,11 @@ def get_gds_driver(uri, username, password, database): try: + if all(v is None for v in [username, password]): + username= os.getenv('NEO4J_USERNAME') + database= os.getenv('NEO4J_DATABASE') + password= os.getenv('NEO4J_PASSWORD') + gds = GraphDataScience( endpoint=uri, auth=(username, password), diff --git a/backend/src/create_chunks.py b/backend/src/create_chunks.py index 0a1657568..523d2b77c 100644 --- a/backend/src/create_chunks.py +++ b/backend/src/create_chunks.py @@ -14,7 +14,7 @@ def __init__(self, pages: list[Document], graph: Neo4jGraph): self.pages = pages self.graph = graph - def split_file_into_chunks(self): + def split_file_into_chunks(self,token_chunk_size, chunk_overlap): """ Split a list of documents(file pages) into chunks of fixed size. @@ -25,8 +25,10 @@ def split_file_into_chunks(self): A list of chunks each of which is a langchain Document. """ logging.info("Split file into smaller chunks") - text_splitter = TokenTextSplitter(chunk_size=200, chunk_overlap=20) - chunk_to_be_created = int(os.environ.get('CHUNKS_TO_BE_CREATED', '50')) + text_splitter = TokenTextSplitter(chunk_size=token_chunk_size, chunk_overlap=chunk_overlap) + MAX_TOKEN_CHUNK_SIZE = int(os.getenv('MAX_TOKEN_CHUNK_SIZE', 10000)) + chunk_to_be_created = int(MAX_TOKEN_CHUNK_SIZE / token_chunk_size) + if 'page' in self.pages[0].metadata: chunks = [] for i, document in enumerate(self.pages): diff --git a/backend/src/diffbot_transformer.py b/backend/src/diffbot_transformer.py index 03e1ba69e..ccac42144 100644 --- a/backend/src/diffbot_transformer.py +++ b/backend/src/diffbot_transformer.py @@ -8,4 +8,4 @@ def get_graph_from_diffbot(graph,chunkId_chunkDoc_list:List): combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list) llm,model_name = get_llm('diffbot') graph_documents = llm.convert_to_graph_documents(combined_chunk_document_list) - return graph_documents \ No newline at end of file + return graph_documents diff --git a/backend/src/document_sources/local_file.py b/backend/src/document_sources/local_file.py index f674a202f..47e12ab48 100644 --- a/backend/src/document_sources/local_file.py +++ b/backend/src/document_sources/local_file.py @@ -62,4 +62,4 @@ def get_pages_with_page_numbers(unstructured_pages): 'filetype':page.metadata['filetype']} if page == unstructured_pages[-1]: pages.append(Document(page_content = page_content, metadata=metadata_with_custom_page_number)) - return pages \ No newline at end of file + return pages diff --git a/backend/src/document_sources/web_pages.py b/backend/src/document_sources/web_pages.py index 91c87510c..cdc0fb76a 100644 --- a/backend/src/document_sources/web_pages.py +++ b/backend/src/document_sources/web_pages.py @@ -6,11 +6,11 @@ def get_documents_from_web_page(source_url:str): try: pages = WebBaseLoader(source_url, verify_ssl=False).load() try: - file_name = pages[0].metadata['title'] + file_name = pages[0].metadata['title'].strip() if not file_name: file_name = last_url_segment(source_url) except: file_name = last_url_segment(source_url) return file_name, pages except Exception as e: - raise LLMGraphBuilderException(str(e)) \ No newline at end of file + raise LLMGraphBuilderException(str(e)) diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index aefaacbd1..f8054061f 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -3,7 +3,8 @@ from neo4j import GraphDatabase import os import json -from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY,CHUNK_TEXT_QUERY,COUNT_CHUNKS_QUERY + +from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY,CHUNK_TEXT_QUERY,COUNT_CHUNKS_QUERY,SCHEMA_VISUALIZATION_QUERY def get_graphDB_driver(uri, username, password,database="neo4j"): """ @@ -15,6 +16,11 @@ def get_graphDB_driver(uri, username, password,database="neo4j"): """ try: logging.info(f"Attempting to connect to the Neo4j database at {uri}") + if all(v is None for v in [username, password]): + username= os.getenv('NEO4J_USERNAME') + database= os.getenv('NEO4J_DATABASE') + password= os.getenv('NEO4J_PASSWORD') + enable_user_agent = os.environ.get("ENABLE_USER_AGENT", "False").lower() in ("true", "1", "yes") if enable_user_agent: driver = GraphDatabase.driver(uri, auth=(username, password),database=database, user_agent=os.environ.get('NEO4J_USER_AGENT')) @@ -228,7 +234,7 @@ def get_chunktext_results(uri, username, password, database, document_name, page offset = 10 skip = (page_no - 1) * offset limit = offset - driver = GraphDatabase.driver(uri, auth=(username, password)) + driver = get_graphDB_driver(uri, username, password,database) with driver.session(database=database) as session: total_chunks_result = session.run(COUNT_CHUNKS_QUERY, file_name=document_name) total_chunks = total_chunks_result.single()["total_chunks"] @@ -252,4 +258,23 @@ def get_chunktext_results(uri, username, password, database, document_name, page raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e finally: if driver: - driver.close() \ No newline at end of file + driver.close() + + +def visualize_schema(uri, userName, password, database): + """Retrieves graph schema""" + driver = None + try: + logging.info("Starting visualizing graph schema") + driver = get_graphDB_driver(uri, userName, password,database) + records, summary, keys = driver.execute_query(SCHEMA_VISUALIZATION_QUERY) + nodes = records[0].get("nodes", []) + relationships = records[0].get("relationships", []) + result = {"nodes": nodes, "relationships": relationships} + return result + except Exception as e: + logging.error(f"An error occurred schema retrieval. Error: {str(e)}") + raise Exception(f"An error occurred schema retrieval. Error: {str(e)}") + finally: + if driver: + driver.close() diff --git a/backend/src/llm.py b/backend/src/llm.py index ab49534ac..c1a5f3eeb 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -47,11 +47,16 @@ def get_llm(model: str): ) elif "openai" in model: model_name, api_key = env_value.split(",") - llm = ChatOpenAI( + if "o3-mini" in model: + llm= ChatOpenAI( + api_key=api_key, + model=model_name) + else: + llm = ChatOpenAI( api_key=api_key, model=model_name, temperature=0, - ) + ) elif "azure" in model: model_name, api_endpoint, api_key, api_version = env_value.split(",") @@ -121,9 +126,7 @@ def get_llm(model: str): return llm, model_name -def get_combined_chunks(chunkId_chunkDoc_list): - chunks_to_combine = int(os.environ.get("NUMBER_OF_CHUNKS_TO_COMBINE")) - logging.info(f"Combining {chunks_to_combine} chunks before sending request to LLM") +def get_combined_chunks(chunkId_chunkDoc_list, chunks_to_combine): combined_chunk_document_list = [] combined_chunks_page_content = [ "".join( @@ -190,11 +193,10 @@ async def get_graph_document_list( graph_document_list = await llm_transformer.aconvert_to_graph_documents(combined_chunk_document_list) return graph_document_list - -async def get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship, additional_instructions=None): +async def get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship, chunks_to_combine, additional_instructions=None): try: llm, model_name = get_llm(model) - combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list) + combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list, chunks_to_combine) if allowedNodes is None or allowedNodes=="": allowedNodes =[] diff --git a/backend/src/main.py b/backend/src/main.py index d5d9a143f..4359ad51f 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -123,7 +123,7 @@ def create_source_node_graph_web_url(graph, model, source_url, source_type): message = f"Unable to read data for given url : {source_url}" raise LLMGraphBuilderException(message) try: - title = pages[0].metadata['title'] + title = pages[0].metadata['title'].strip() if not title: title = last_url_segment(source_url) language = pages[0].metadata['language'] @@ -221,7 +221,7 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type lst_file_name.append({'fileName':obj_source_node.file_name,'fileSize':obj_source_node.file_size,'url':obj_source_node.url, 'language':obj_source_node.language, 'status':'Success'}) return lst_file_name,success_count,failed_count -async def extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, fileName, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, fileName, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): logging.info(f'Process file name :{fileName}') if not retry_condition: @@ -233,11 +233,11 @@ async def extract_graph_from_file_local_file(uri, userName, password, database, file_name, pages, file_extension = get_documents_from_file_by_path(merged_file_path,fileName) if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'File content is not available for file : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, True, merged_file_path) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, True, merged_file_path) else: - return await processing_source(uri, userName, password, database, model, fileName, [], allowedNodes, allowedRelationship, True, merged_file_path, retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, fileName, [], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, True, merged_file_path, retry_condition, additional_instructions=additional_instructions) -async def extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): if not retry_condition: if(aws_access_key_id==None or aws_secret_access_key==None): raise LLMGraphBuilderException('Please provide AWS access and secret keys') @@ -247,48 +247,48 @@ async def extract_graph_from_file_s3(uri, userName, password, database, model, s if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'File content is not available for file : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine) else: - return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition=retry_condition, additional_instructions=additional_instructions) -async def extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): if not retry_condition: file_name, pages = get_documents_from_web_page(source_url) if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'Content is not available for given URL : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine) else: - return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition=retry_condition, additional_instructions=additional_instructions) -async def extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): if not retry_condition: file_name, pages = get_documents_from_youtube(source_url) if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'Youtube transcript is not available for file : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine) else: - return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition=retry_condition, additional_instructions=additional_instructions) -async def extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): if not retry_condition: file_name, pages = get_documents_from_Wikipedia(wiki_query, language) if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'Wikipedia page is not available for file : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine) else: - return await processing_source(uri, userName, password, database, model, file_name,[], allowedNodes, allowedRelationship, retry_condition=retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, file_name,[], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition=retry_condition, additional_instructions=additional_instructions) -async def extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): if not retry_condition: file_name, pages = get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token) if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'File content is not available for file : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine) else: - return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition=retry_condition, additional_instructions=additional_instructions) -async def processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, is_uploaded_from_local=None, merged_file_path=None, retry_condition=None, additional_instructions=None): +async def processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, is_uploaded_from_local=None, merged_file_path=None, retry_condition=None, additional_instructions=None): """ Extracts a Neo4jGraph from a PDF file based on the model. @@ -317,7 +317,7 @@ async def processing_source(uri, userName, password, database, model, file_name, graphDb_data_Access = graphDBdataAccess(graph) create_chunk_vector_index(graph) start_get_chunkId_chunkDoc_list = time.time() - total_chunks, chunkId_chunkDoc_list = get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition) + total_chunks, chunkId_chunkDoc_list = get_chunkId_chunkDoc_list(graph, file_name, pages, token_chunk_size, chunk_overlap, retry_condition) end_get_chunkId_chunkDoc_list = time.time() elapsed_get_chunkId_chunkDoc_list = end_get_chunkId_chunkDoc_list - start_get_chunkId_chunkDoc_list logging.info(f'Time taken to create list chunkids with chunk document: {elapsed_get_chunkId_chunkDoc_list:.2f} seconds') @@ -380,7 +380,7 @@ async def processing_source(uri, userName, password, database, model, file_name, break else: processing_chunks_start_time = time.time() - node_count,rel_count,latency_processed_chunk = await processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count, additional_instructions) + node_count,rel_count,latency_processed_chunk = await processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,chunks_to_combine,node_count, rel_count, additional_instructions) processing_chunks_end_time = time.time() processing_chunks_elapsed_end_time = processing_chunks_end_time - processing_chunks_start_time logging.info(f"Time taken {update_graph_chunk_processed} chunks processed upto {select_chunks_upto} completed in {processing_chunks_elapsed_end_time:.2f} seconds for file name {file_name}") @@ -457,7 +457,7 @@ async def processing_source(uri, userName, password, database, model, file_name, logging.error(error_message) raise LLMGraphBuilderException(error_message) -async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count, additional_instructions=None): +async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, chunks_to_combine, node_count, rel_count, additional_instructions=None): #create vector index and update chunk node with embedding latency_processing_chunk = {} if graph is not None: @@ -475,7 +475,7 @@ async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, logging.info("Get graph document list from models") start_entity_extraction = time.time() - graph_documents = await get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship, additional_instructions) + graph_documents = await get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship, chunks_to_combine, additional_instructions) end_entity_extraction = time.time() elapsed_entity_extraction = end_entity_extraction - start_entity_extraction logging.info(f'Time taken to extract enitities from LLM Graph Builder: {elapsed_entity_extraction:.2f} seconds') @@ -515,7 +515,7 @@ async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, rel_count += len(relations) return node_count,rel_count,latency_processing_chunk -def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): +def get_chunkId_chunkDoc_list(graph, file_name, pages, token_chunk_size, chunk_overlap, retry_condition): if not retry_condition: logging.info("Break down file into chunks") bad_chars = ['"', "\n", "'"] @@ -528,7 +528,7 @@ def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): text = text.replace(j, '') pages[i]=Document(page_content=str(text), metadata=pages[i].metadata) create_chunks_obj = CreateChunksofDocument(pages, graph) - chunks = create_chunks_obj.split_file_into_chunks() + chunks = create_chunks_obj.split_file_into_chunks(token_chunk_size, chunk_overlap) chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks) return len(chunks), chunkId_chunkDoc_list @@ -673,11 +673,11 @@ def get_labels_and_relationtypes(graph): query = """ RETURN collect { CALL db.labels() yield label - WHERE NOT label IN ['Document','Chunk','_Bloom_Perspective_', '__Community__', '__Entity__'] + WHERE NOT label IN ['Document','Chunk','_Bloom_Perspective_', '__Community__', '__Entity__', 'Session', 'Message'] return label order by label limit 100 } as labels, collect { CALL db.relationshipTypes() yield relationshipType as type - WHERE NOT type IN ['PART_OF', 'NEXT_CHUNK', 'HAS_ENTITY', '_Bloom_Perspective_','FIRST_CHUNK','SIMILAR','IN_COMMUNITY','PARENT_COMMUNITY'] + WHERE NOT type IN ['PART_OF', 'NEXT_CHUNK', 'HAS_ENTITY', '_Bloom_Perspective_','FIRST_CHUNK','SIMILAR','IN_COMMUNITY','PARENT_COMMUNITY', 'NEXT', 'LAST_MESSAGE'] return type order by type LIMIT 100 } as relationshipTypes """ graphDb_data_Access = graphDBdataAccess(graph) diff --git a/backend/src/make_relationships.py b/backend/src/make_relationships.py index 333f0c550..bccfa1ddd 100644 --- a/backend/src/make_relationships.py +++ b/backend/src/make_relationships.py @@ -161,7 +161,8 @@ def create_chunk_vector_index(graph): graph=graph, node_label="Chunk", embedding_node_property="embedding", - index_name="vector" + index_name="vector", + embedding_dimension=EMBEDDING_DIMENSION ) vector_store.create_new_index() logging.info(f"Index created successfully. Time taken: {time.time() - start_time:.2f} seconds") diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py index ccc3437f5..cdc8b06d3 100644 --- a/backend/src/post_processing.py +++ b/backend/src/post_processing.py @@ -3,6 +3,7 @@ import time from langchain_neo4j import Neo4jGraph import os +from src.graph_query import get_graphDB_driver from src.shared.common_fn import load_embedding_model from langchain_core.output_parsers import JsonOutputParser from langchain_core.prompts import ChatPromptTemplate @@ -137,7 +138,7 @@ def create_vector_fulltext_indexes(uri, username, password, database): logging.info("Starting the process of creating full-text indexes.") try: - driver = GraphDatabase.driver(uri, auth=(username, password), database=database) + driver = get_graphDB_driver(uri, username, password,database) driver.verify_connectivity() logging.info("Database connectivity verified.") except Exception as e: @@ -233,6 +234,3 @@ def graph_schema_consolidation(graph): graph.query(query) return None - - - \ No newline at end of file diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index eeb603245..b85654c8c 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -1,4 +1,3 @@ - OPENAI_MODELS = ["openai-gpt-3.5", "openai-gpt-4o", "openai-gpt-4o-mini"] GEMINI_MODELS = ["gemini-1.0-pro", "gemini-1.5-pro", "gemini-1.5-flash"] GROQ_MODELS = ["groq-llama3"] @@ -893,3 +892,19 @@ types such as dates, numbers, revenues, and other non-entity information are not extracted as separate nodes. Instead, treat these as properties associated with the relevant entities.""" +SCHEMA_VISUALIZATION_QUERY = """ +CALL db.schema.visualization() YIELD nodes, relationships +RETURN + [n IN nodes | { + element_id: elementId(n), + labels: labels(n), + properties: apoc.any.properties(n) + }] AS nodes, + [r IN relationships | { + type: type(r), + properties: apoc.any.properties(r), + element_id: elementId(r), + start_node_element_id: elementId(startNode(r)), + end_node_element_id: elementId(endNode(r)) + }] AS relationships; +""" diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 03d0d470b..2d7a5c5e5 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -6,28 +6,24 @@ import pandas as pd from datetime import datetime as dt from dotenv import load_dotenv -# from score import * from src.main import * from src.QA_integration import QA_RAG -from langserve import add_routes from src.ragas_eval import get_ragas_metrics from datasets import Dataset -from ragas import evaluate -# from ragas.metrics import answer_relevancy, context_utilization, faithfulness -# from ragas.dataset_schema import SingleTurnSample -# Load environment variables if needed +# Load environment variables load_dotenv() -import os URI = os.getenv('NEO4J_URI') USERNAME = os.getenv('NEO4J_USERNAME') PASSWORD = os.getenv('NEO4J_PASSWORD') DATABASE = os.getenv('NEO4J_DATABASE') - -CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") -MERGED_DIR = os.path.join(os.path.dirname(__file__), "merged_files") - -# Initialize database connection -graph = create_graph_database_connection(URI,USERNAME,PASSWORD,DATABASE) +# Logging configuration +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +# Directory Paths +BASE_DIR = os.path.dirname(__file__) +CHUNK_DIR = os.path.join(BASE_DIR, "chunks") +MERGED_DIR = os.path.join(BASE_DIR, "merged_files") +# Initialize Neo4j connection +graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) def create_source_node_local(graph, model, file_name): """Creates a source node for a local file.""" @@ -42,151 +38,130 @@ def create_source_node_local(graph, model, file_name): graphDB_data_Access.create_source_node(source_node) return source_node -def delete_extracted_files(file_path): - """Delete the extracted files once extraction process is completed""" - try: - if os.path.exists(file_path): - os.remove(file_path) - logging.info(f"Deleted file:{file_path}") - else: - logging.warning(f"File not found for deletion: {file_path}") - except Exception as e: - logging.error(f"Failed to delete file {file_path}. Error: {e}") def test_graph_from_file_local(model_name): - """Test graph creation from a local file.""" - try: - file_name = 'About Amazon.pdf' - shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf',os.path.join(MERGED_DIR, file_name)) - create_source_node_local(graph, model_name, file_name) - merged_file_path = os.path.join(MERGED_DIR, file_name) - local_file_result = asyncio.run(extract_graph_from_file_local_file(URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '',None)) - logging.info("Local file processing complete") - print(local_file_result) - return local_file_result - except Exception as e: - logging.error(f"Failed to delete file. Error: {e}") - -# try: -# assert local_file_result['status'] == 'Completed' -# assert local_file_result['nodeCount'] > 0 -# assert local_file_result['relationshipCount'] > 0 -# print("Success") -# except AssertionError as e: -# print("Fail: ", e) - - # Delete the file after processing -# delete_extracted_fiKles(merged_file_path) - - #return local_file_result + """Tests graph creation from a local file.""" + try: + file_name = 'About Amazon.pdf' + merged_file_path = os.path.join(MERGED_DIR, file_name) + shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf', merged_file_path) + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + create_source_node_local(graph, model_name, file_name) + result = asyncio.run( + extract_graph_from_file_local_file( + URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '', None, '' + ) + ) + logging.info(f"Local file test result: {result}") + return result + except Exception as e: + logging.error(f"Error in test_graph_from_file_local: {e}") + return {"status": "Failed", "error": str(e)} def test_graph_from_wikipedia(model_name): - try: - """Test graph creation from a Wikipedia page.""" + """Tests graph creation from a Wikipedia page.""" + try: wiki_query = 'https://en.wikipedia.org/wiki/Apollo_program' - source_type = 'Wikipedia' - file_name = "Apollo_program" - create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type) - - wiki_result = asyncio.run(extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, wiki_query, 'en',file_name, '', '',None)) - logging.info("Wikipedia test done") - print(wiki_result) - # try: - # assert wiki_result['status'] == 'Completed' - # assert wiki_result['nodeCount'] > 0 - # assert wiki_result['relationshipCount'] > 0 - # print("Success") - # except AssertionError as e: - # print("Fail: ", e) - - return wiki_result - except Exception as ex: - print('Hello error herte') - print(ex) - -def test_graph_website(model_name): - """Test graph creation from a Website page.""" - #graph, model, source_url, source_type - source_url = 'https://www.cloudskillsboost.google/' - source_type = 'web-url' - file_name = 'Google Cloud Skills Boost' - # file_name = [] - create_source_node_graph_web_url(graph, model_name, source_url, source_type) - - weburl_result = asyncio.run(extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url,file_name, '', '',None)) - logging.info("WebUrl test done") - print(weburl_result) - - # try: - # assert weburl_result['status'] == 'Completed' - # assert weburl_result['nodeCount'] > 0 - # assert weburl_result['relationshipCount'] > 0 - # print("Success") - # except AssertionError as e: - # print("Fail: ", e) - return weburl_result - + file_name = 'Apollo_program' + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, "Wikipedia") + result = asyncio.run( + extract_graph_from_file_Wikipedia( + URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 'en', file_name, '', '', None, '' + ) + ) + logging.info(f"Wikipedia test result: {result}") + return result + except Exception as e: + logging.error(f"Error in test_graph_from_wikipedia: {e}") + return {"status": "Failed", "error": str(e)} + def test_graph_from_youtube_video(model_name): - """Test graph creation from a YouTube video.""" - source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' - file_name = 'NKc8Tr5_L3w' - source_type = 'youtube' - create_source_node_graph_url_youtube(graph, model_name, source_url, source_type) - youtube_result = asyncio.run(extract_graph_from_file_youtube(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url,file_name,'','',None)) - logging.info("YouTube Video test done") - print(youtube_result) - -# try: -# assert youtube_result['status'] == 'Completed' -# assert youtube_result['nodeCount'] > 1 -# assert youtube_result['relationshipCount'] > 1 -# print("Success") -# except AssertionError as e: -# print("Failed: ", e) - - return youtube_result + """Tests graph creation from a YouTube video.""" + try: + source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' + file_name = 'NKc8Tr5_L3w' + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + create_source_node_graph_url_youtube(graph, model_name, source_url, "youtube") + result = asyncio.run( + extract_graph_from_file_youtube( + URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, file_name, '', '', None, '' + ) + ) + logging.info(f"YouTube video test result: {result}") + if isinstance(result, dict) and result.get("status") == "Failed": + return {"status": "Failed", "error": result.get("error", "Unknown error")} + return result + except Exception as e: + logging.error(f"Error in test_graph_from_youtube_video: {e}") + return {"status": "Failed", "error": str(e)} +def test_graph_website(model_name): + """Tests graph creation from a Website page.""" + try: + source_url = 'https://www.cloudskillsboost.google/' + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + create_source_node_graph_web_url(graph, model_name, source_url, "web-url") + result = asyncio.run( + extract_graph_from_web_page( + URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, "Google Cloud Skills Boost", '', '', None, '' + ) + ) + logging.info(f"Web URL test result: {result}") + if isinstance(result, dict) and result.get("status") == "Failed": + return {"status": "Failed", "error": result.get("error", "Unknown error")} + return result + except Exception as e: + logging.error(f"Error in test_graph_website: {e}") + return {"status": "Failed", "error": str(e)} + def test_chatbot_qna(model_name, mode='vector'): - """Test chatbot QnA functionality for different modes.""" - QA_n_RAG = QA_RAG(graph, model_name, 'Tell me about amazon', '[]', 1, mode) - print(QA_n_RAG) - print(len(QA_n_RAG['message'])) + """Tests chatbot QnA functionality.""" + try: + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + result = QA_RAG(graph, model_name, 'Tell me about Amazon', '[]', 1, mode) + # assert len(result['message']) > 20 + logging.info(f"Chatbot QnA test passed for mode: {mode}") + return result + except Exception as e: + logging.error(f"Error in chatbot QnA: {e}") + return {"status": "Failed", "error": str(e)} +def get_disconnected_nodes(): + """Fetches list of disconnected nodes.""" + try: + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + graphDb_data_Access = graphDBdataAccess(graph) + nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() + if not nodes_list: + return None,"No records found" + return nodes_list[0]["e"]["elementId"], "Records loaded successfully" if total_nodes['total'] > 0 else "No records found" + except Exception as e: + logging.error(f"Error in get_disconnected_nodes: {e}") + return None, "Error fetching nodes" +def delete_disconnected_nodes(lst_element_id): + """Deletes disconnected nodes from the graph.""" try: - assert len(QA_n_RAG['message']) > 20 - return QA_n_RAG - print("Success") - except AssertionError as e: - print("Failed ", e) - return QA_n_RAG - -#Get Test disconnected_nodes list -def disconected_nodes(): - #graph = create_graph_database_connection(uri, userName, password, database) - graphDb_data_Access = graphDBdataAccess(graph) - nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() - print(nodes_list[0]["e"]["elementId"]) - status = "False" - if total_nodes['total']>0: - status = "get_unconnected_nodes_list.. records loaded successfully" - else: - status = "get_unconnected_nodes_list ..records not loaded" - return nodes_list[0]["e"]["elementId"], status - -#Test Delete delete_disconnected_nodes list -def delete_disconected_nodes(lst_element_id): - print(f'disconnect elementid list {lst_element_id}') - #graph = create_graph_database_connection(uri, userName, password, database) - graphDb_data_Access = graphDBdataAccess(graph) - result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) - print(f'delete disconnect api result {result}') - if not result: - return "delete_unconnected_nodes..Succesfully deleted first index of disconnected nodes" - else: - return "delete_unconnected_nodes..Unable to delete Nodes" + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + graphDb_data_Access = graphDBdataAccess(graph) + result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) + return "Successfully deleted disconnected nodes" if not result else "Failed to delete nodes" + except Exception as e: + logging.error(f"Error in delete_disconnected_nodes: {e}") + return "Error in deletion" -#Test Get Duplicate_nodes +def test_populate_graph_schema_from_text(model_name): + """Tests schema population from text.""" + try: + schema_text = "Amazon was founded on July 5, 1994, by Jeff Bezos in Bellevue, Washington." + result_schema = populate_graph_schema_from_text(schema_text, model_name, True) + logging.info(f"Schema test result: {result_schema}") + return result_schema + except Exception as e: + logging.error(f"Error in populate_graph_schema_from_text: {e}") + return {"status": "Failed", "error": str(e)} + def get_duplicate_nodes(): #graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) @@ -195,66 +170,95 @@ def get_duplicate_nodes(): return "Data successfully loaded" else: return "Unable to load data" - -#Test populate_graph_schema -def test_populate_graph_schema_from_text(model_name): - schema_text =('Amazon was founded on July 5, 1994, by Jeff Bezos in Bellevue, Washington.The company originally started as an online marketplace for books but gradually expanded its offerings to include a wide range of product categories. This diversification led to it being referred.') - #result_schema='' - try: - result_schema = populate_graph_schema_from_text(schema_text, model_name, True) - print(result_schema) - return result_schema - except Exception as e: - print("Failed to get schema from text", e) - return e - + def run_tests(): - final_list = [] - error_list = [] - - models = ['openai_gpt_4','openai_gpt_4o','openai_gpt_4o_mini','gemini_1.5_pro','gemini_1.5_flash'] - + """Runs all integration tests and logs results.""" + extract_list = [] + extract_error_list = [] + chatbot_list = [] + chatbot_error_list = [] + other_api_list = [] + models = ['openai_gpt_4','openai_gpt_4o','openai_gpt_4o_mini','gemini_1.5_pro','gemini_1.5_flash','gemini_2.0_flash','bedrock_nova_micro_v1','bedrock_nova_lite_v1','bedrock_nova_pro_v1','fireworks_qwen72b_instruct'] + chatbot_modes = [ + "vector", + "graph+vector", + "fulltext", + "graph+vector+fulltext", + "entity search+vector" + ] for model_name in models: + logging.info(f"Starting tests for model: {model_name}") + # Run each test independently to capture all errors + for test_func, test_args in [ + (test_graph_from_file_local, [model_name]), + (test_graph_from_wikipedia, [model_name]), + (test_graph_from_youtube_video,[model_name]), + (test_graph_website,[model_name]), + ]: + try: + result = test_func(*test_args) + if isinstance(result, dict) and result.get("status") == "Failed": + extract_error_list.append((model_name, test_func.__name__, result.get("error", "Unknown error"))) + else: + extract_list.append(result) + except Exception as e: + logging.error(f"Error in {test_func.__name__} for {model_name}: {e}") + extract_error_list.append((model_name, test_func.__name__, str(e))) + # Run all chatbot QnA modes + for mode in chatbot_modes: + try: + result = test_chatbot_qna(model_name,mode=mode) + if isinstance(result, dict) and result.get("status") == "Failed": + chatbot_error_list.append((model_name, f"test_chatbot_qna ({mode})", result.get("error", "Unknown error"))) + else: + chatbot_list.append(result) + except Exception as e: + logging.error(f"Error in test_chatbot_qna ({mode}) for {model_name}: {e}") + chatbot_error_list.append((model_name, f"test_chatbot_qna ({mode})", str(e))) + try: - final_list.append(test_graph_from_file_local(model_name)) - final_list.append(test_graph_from_wikipedia(model_name)) - final_list.append(test_graph_website(model_name)) - final_list.append(test_populate_graph_schema_from_text(model_name)) - final_list.append(test_graph_from_youtube_video(model_name)) - final_list.append(test_chatbot_qna(model_name)) - final_list.append(test_chatbot_qna(model_name, mode='vector')) - final_list.append(test_chatbot_qna(model_name, mode='graph+vector')) - final_list.append(test_chatbot_qna(model_name, mode='fulltext')) - final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) - final_list.append(test_chatbot_qna(model_name, mode='entity search+vector')) - + schema_result = test_populate_graph_schema_from_text(model_name) + print("KAUSTUBH : ",schema_result) + other_api_list.append({f"{model_name}":schema_result}) + print("other_api_list : ",other_api_list) except Exception as e: - error_list.append((model_name, str(e))) + logging.error(f"Error in test_populate_graph_schema_from_text for {model_name}: {e}") + other_api_list.append({f"{model_name}":str(e)}) + # Handle disconnected nodes separately + try: + dis_elementid, dis_status = get_disconnected_nodes() + delete_status = delete_disconnected_nodes([dis_elementid]) if dis_elementid else "No disconnected nodes found" + except Exception as e: + dis_status, delete_status = "Error fetching nodes", "Error deleting nodes" + logging.error(f"Error handling disconnected nodes: {e}") -# test_populate_graph_schema_from_text('openai-gpt-4o') -#delete diconnected nodes - dis_elementid, dis_status = disconected_nodes() - lst_element_id = [dis_elementid] - delt = delete_disconected_nodes(lst_element_id) - dup = get_duplicate_nodes() - print(final_list) - schma = test_populate_graph_schema_from_text(model_name) - # Save final results to CSV - df = pd.DataFrame(final_list) - print(df) - df['execution_date'] = dt.today().strftime('%Y-%m-%d') -#diconnected nodes - df['disconnected_nodes']=dis_status - df['get_duplicate_nodes']=dup + try: + dup = get_duplicate_nodes() + except Exception as e: + dup = "Error getting duplicate nodes" + logging.error(f"Error getting duplicate nodes: {e}") + # Convert results to DataFrame + df_extract = pd.DataFrame(extract_list) + df_extract['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_extract.to_csv(f"test_results/Extract_Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) - df['delete_disconected_nodes']=delt - df['test_populate_graph_schema_from_text'] = schma - df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + df_chatbot = pd.DataFrame(chatbot_list) + df_chatbot['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_chatbot.to_csv(f"test_results/chatbot_Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) - # Save error details to CSV - df_errors = pd.DataFrame(error_list, columns=['Model', 'Error']) - df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') - df_errors.to_csv(f"Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + other_api_dict = {'disconnected_nodes':dis_status,'delete_disconnected_nodes' : delete_status,'get_duplicate_nodes':dup,'test_populate_graph_schema_from_text':other_api_list} + with open(f"test_results/other_api_results_{dt.now().strftime('%Y%m%d_%H%M%S')}.txt", "w") as file: + file.write(json.dumps(other_api_dict, indent=4)) + # Save errors + if extract_error_list: + df_errors = pd.DataFrame(extract_error_list, columns=['Model', 'Function', 'Error']) + df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_errors.to_csv(f"test_results/Extract_Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + if chatbot_error_list: + df_errors = pd.DataFrame(chatbot_error_list, columns=['Model', 'Function', 'Error']) + df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_errors.to_csv(f"test_results/chatbot_Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + logging.info("All tests completed.") if __name__ == "__main__": - run_tests() \ No newline at end of file + run_tests() diff --git a/docker-compose.yml b/docker-compose.yml index 6f1b4174e..d3fe710f1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -65,7 +65,7 @@ services: - VITE_LLM_MODELS_PROD=${VITE_LLM_MODELS_PROD-openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash} - VITE_AUTH0_DOMAIN=${VITE_AUTH0_DOMAIN-} - VITE_AUTH0_CLIENT_ID=${VITE_AUTH0_CLIENT_ID-} - - VITE_SKIP_AUTH=${VITE_SKIP_AUTH-true} + - VITE_SKIP_AUTH=$VITE_SKIP_AUTH-true} - DEPLOYMENT_ENV=local volumes: - ./frontend:/app diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc index 058f88a58..d82f12870 100644 --- a/docs/backend/backend_docs.adoc +++ b/docs/backend/backend_docs.adoc @@ -1075,3 +1075,56 @@ The API responsible for create the connection obj from Neo4j DB based on environ "error": "Could not connect to Neo4j database. Please ensure that the username and password are correct", "message": "Unable to connect backend DB" } +.... + +=== Visualize graph DB schema +---- +POST /schema_visualization +---- + +User can visualize schema of the db through this API. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "nodes": [ + { + "element_id": "-5374", + "labels": [ + "Entity" + ], + "properties": { + "name": "Entity", + "indexes": [ + "id,description" + ], + "constraints": [] + } + }, + ], + "relationships": [ + { + "element_id": "-44223", + "end_node_element_id": "-5411", + "start_node_element_id": "-5342", + "properties": { + "name": "OWNED" + }, + "type": "OWNED" + }, + ] + }, + "message": "Total elapsed API time 3.51" +} +.... diff --git a/example.env b/example.env index 5d3a598c9..71c9dd36b 100644 --- a/example.env +++ b/example.env @@ -27,6 +27,9 @@ VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web" VITE_ENV="DEV" VITE_TIME_PER_PAGE=50 VITE_CHUNK_SIZE=5242880 +VITE_CHUNK_OVERLAP=20 +VITE_TOKENS_PER_CHUNK=100 +VITE_CHUNK_TO_COMBINE=1 VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" VITE_BATCH_SIZE=2 diff --git a/frontend/example.env b/frontend/example.env index f96efd207..1576bbea0 100644 --- a/frontend/example.env +++ b/frontend/example.env @@ -5,6 +5,9 @@ VITE_LLM_MODELS="diffbot,openai_gpt_3.5,openai_gpt_4o" VITE_ENV="DEV" VITE_TIME_PER_PAGE=50 VITE_CHUNK_SIZE=5242880 +VITE_CHUNK_OVERLAP=20 +VITE_TOKENS_PER_CHUNK=100 +VITE_CHUNK_TO_COMBINE=1 VITE_LARGE_FILE_SIZE=5242880 VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" diff --git a/frontend/package.json b/frontend/package.json index baedaee34..cc23f21f6 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -16,8 +16,8 @@ "@mui/material": "^5.15.10", "@mui/styled-engine": "^5.15.9", "@neo4j-devtools/word-color": "^0.0.8", - "@neo4j-ndl/base": "^3.0.16", - "@neo4j-ndl/react": "^3.0.30", + "@neo4j-ndl/base": "^3.2.9", + "@neo4j-ndl/react": "^3.2.18", "@neo4j-nvl/base": "^0.3.6", "@neo4j-nvl/react": "^0.3.6", "@react-oauth/google": "^0.12.1", @@ -34,8 +34,6 @@ "react-markdown": "^9.0.1", "react-router": "^6.23.1", "react-router-dom": "^6.23.1", - "remark-gfm": "^4.0.0", - "tailwind-merge": "^2.3.0", "uuid": "^9.0.1" }, "devDependencies": { diff --git a/frontend/src/API/Index.ts b/frontend/src/API/Index.ts index b89924221..d3144379e 100644 --- a/frontend/src/API/Index.ts +++ b/frontend/src/API/Index.ts @@ -9,11 +9,11 @@ const api = axios.create({ export const createDefaultFormData = (userCredentials: UserCredentials) => { const formData = new FormData(); - formData.append('uri', userCredentials?.uri ?? ''); - formData.append('database', userCredentials?.database ?? ''); - formData.append('userName', userCredentials?.userName ?? ''); - formData.append('password', userCredentials?.password ?? ''); - formData.append('email', userCredentials?.email ?? ''); + if (userCredentials?.uri) formData.append('uri', userCredentials?.uri); + if (userCredentials?.database) formData.append('database', userCredentials?.database); + if (userCredentials?.userName) formData.append('userName', userCredentials?.userName); + if (userCredentials?.password) formData.append('password', userCredentials?.password); + if (userCredentials?.email) formData.append('email', userCredentials?.email); api.interceptors.request.use( (config) => { if (config.data instanceof FormData) { diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 6a7ce0948..f975e5c52 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -8,6 +8,7 @@ const App = () => { return ( : }> + }> }> ); diff --git a/frontend/src/components/Auth/Auth.tsx b/frontend/src/components/Auth/Auth.tsx index a9fdc568b..30849d4da 100644 --- a/frontend/src/components/Auth/Auth.tsx +++ b/frontend/src/components/Auth/Auth.tsx @@ -7,6 +7,7 @@ const Auth0ProviderWithHistory: React.FC<{ children: React.ReactNode }> = ({ chi const navigate = useNavigate(); function onRedirectCallback(appState?: AppState) { + localStorage.removeItem('isReadOnlyMode'); navigate(appState?.returnTo || window.location.pathname, { state: appState }); } diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 779b591f9..d14c48be6 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -18,7 +18,6 @@ import { ExtendedNode, chatInfoMessage } from '../../types'; import { useEffect, useMemo, useReducer, useRef, useState } from 'react'; import GraphViewButton from '../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; -import { useCredentials } from '../../context/UserCredentials'; import { tokens } from '@neo4j-ndl/base'; import ChunkInfo from './ChunkInfo'; import EntitiesInfo from './EntitiesInfo'; @@ -82,12 +81,11 @@ const ChatInfoModal: React.FC = ({ error?.length ? 10 : mode === chatModeLables['global search+vector+fulltext'] - ? 7 - : mode === chatModeLables.graph - ? 4 - : 3 + ? 7 + : mode === chatModeLables.graph + ? 4 + : 3 ); - const { userCredentials } = useCredentials(); const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); const [showMetricsTable, setShowMetricsTable] = useState(Boolean(metricDetails)); @@ -99,15 +97,15 @@ const ChatInfoModal: React.FC = ({ multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length > 4 ? true : multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length <= 4 - ? false - : null + ? false + : null ); const [isAdditionalMetricsWithSingleMode, setIsAdditionalMetricsWithSingleMode] = useState( metricDetails != undefined && Object.keys(metricDetails).length > 3 ? true : metricDetails != undefined && Object.keys(metricDetails).length <= 3 - ? false - : null + ? false + : null ); const actions: React.ComponentProps>[] = useMemo( @@ -142,10 +140,9 @@ const ChatInfoModal: React.FC = ({ toggleInfoLoading(); try { const response = await chunkEntitiesAPI( - userCredentials?.database, nodeDetails, entities_ids, - mode + mode, ); if (response.data.status === 'Failure') { throw new Error(response.data.error); @@ -355,9 +352,9 @@ const ChatInfoModal: React.FC = ({ {mode != chatModeLables.graph ? Sources used : <>} {mode != chatModeLables.graph ? Chunks : <>} {mode === chatModeLables['graph+vector'] || - mode === chatModeLables.graph || - mode === chatModeLables['graph+vector+fulltext'] || - mode === chatModeLables['entity search+vector'] ? ( + mode === chatModeLables.graph || + mode === chatModeLables['graph+vector+fulltext'] || + mode === chatModeLables['entity search+vector'] ? ( Top Entities used ) : ( <> diff --git a/frontend/src/components/ChatBot/ChatOnlyComponent.tsx b/frontend/src/components/ChatBot/ChatOnlyComponent.tsx index 7a3236a97..33ee19bd9 100644 --- a/frontend/src/components/ChatBot/ChatOnlyComponent.tsx +++ b/frontend/src/components/ChatBot/ChatOnlyComponent.tsx @@ -34,9 +34,17 @@ const ChatContent: React.FC = ({ chatMessages }) => { const port = urlParams.get('port'); const email = urlParams.get('email'); const openModal = urlParams.get('open') === 'true'; + const connectionStatus = urlParams.get('connectionStatus') === 'true'; if (openModal || !(uri && user && encodedPassword && database && port)) { - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - } else { + if (connectionStatus) { + setShowBackButton(); + setConnectionStatus(connectionStatus); + setMessages(chatMessages); + } else { + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + } + } + else { const credentialsForAPI: UserCredentials = { uri, userName: user, @@ -75,9 +83,9 @@ const ChatContent: React.FC = ({ chatMessages }) => { try { setClearHistoryData(true); setIsDeleteChatLoading(true); - const credentials = JSON.parse(localStorage.getItem('neo4j.connection') || '{}') as UserCredentials; + // const credentials = JSON.parse(localStorage.getItem('neo4j.connection') || '{}') as UserCredentials; const sessionId = sessionStorage.getItem('session_id') || ''; - const response = await clearChatAPI(credentials, sessionId); + const response = await clearChatAPI(sessionId); setIsDeleteChatLoading(false); if (response.data.status !== 'Success') { setClearHistoryData(false); diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 9d5f348f6..93c9e42ab 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -17,6 +17,9 @@ import { llms, RETRY_OPIONS, tooltips, + tokenchunkSize, + chunkOverlap, + chunksToCombine, } from '../utils/Constants'; import ButtonWithToolTip from './UI/ButtonWithToolTip'; import DropdownComponent from './Dropdown'; @@ -37,6 +40,7 @@ import { getChunkText } from '../services/getChunkText'; import ChunkPopUp from './Popups/ChunkPopUp'; import { isExpired, isFileReadyToProcess } from '../utils/Utils'; import { useHasSelections } from '../hooks/useHasSelections'; +import { Hierarchy1Icon } from '@neo4j-ndl/react/icons'; const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -81,9 +85,15 @@ const Content: React.FC = ({ setModel, selectedNodes, selectedRels, + selectedTokenChunkSize, + selectedChunk_overlap, + selectedChunks_to_combine, setSelectedNodes, setRowSelection, setSelectedRels, + setSelectedTokenChunkSize, + setSelectedChunk_overlap, + setSelectedChunks_to_combine, postProcessingTasks, queue, processedCount, @@ -93,7 +103,7 @@ const Content: React.FC = ({ additionalInstructions, setAdditionalInstructions, } = useFileContext(); - const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView' | 'neighborView'>( + const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView' | 'neighborView'|'showSchemaView'>( 'tableView' ); const [showDeletePopUp, setShowDeletePopUp] = useState(false); @@ -143,10 +153,10 @@ const Content: React.FC = ({ ? postProcessingTasks.filter((task) => task !== 'graph_schema_consolidation') : postProcessingTasks : hasSelections - ? postProcessingTasks.filter( + ? postProcessingTasks.filter( (task) => task !== 'graph_schema_consolidation' && task !== 'enable_communities' ) - : postProcessingTasks.filter((task) => task !== 'enable_communities'); + : postProcessingTasks.filter((task) => task !== 'enable_communities'); if (payload.length) { const response = await postProcessing(payload); if (response.data.status === 'Success') { @@ -263,10 +273,7 @@ const Content: React.FC = ({ const { name } = fileItem; triggerStatusUpdateAPI( name as string, - userCredentials?.uri, - userCredentials?.userName, - userCredentials?.password, - userCredentials?.database, + userCredentials, updateStatusForLargeFiles ); } @@ -283,6 +290,9 @@ const Content: React.FC = ({ fileItem.gcsBucketFolder ?? '', selectedNodes.map((l) => l.value), selectedRels.map((t) => t.value), + selectedTokenChunkSize, + selectedChunk_overlap, + selectedChunks_to_combine, fileItem.googleProjectId, fileItem.language, fileItem.accessToken, @@ -531,13 +541,15 @@ const Content: React.FC = ({ const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; - const uriCoded = userCredentials?.uri.replace(/:\d+$/, ''); - const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${userCredentials?.port ?? '7687' - }`; - const encodedURL = encodeURIComponent(connectURL); - const replacedUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); - window.open(replacedUrl, '_blank'); - }; + let finalUrl = bloomUrl; + if (userCredentials?.database && userCredentials.uri && userCredentials.userName) { + const uriCoded = userCredentials.uri.replace(/:\d+$/, ''); + const connectURL = `${uriCoded.split('//')[0]}//${userCredentials.userName}@${uriCoded.split('//')[1]}:${userCredentials.port ?? '7687'}`; + const encodedURL = encodeURIComponent(connectURL); + finalUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); + } + window.open(finalUrl, '_blank'); + }; const handleGraphView = () => { setOpenGraphView(true); @@ -554,6 +566,12 @@ const Content: React.FC = ({ setUserCredentials({ uri: '', password: '', userName: '', database: '', email: '' }); setSelectedNodes([]); setSelectedRels([]); + localStorage.removeItem('selectedTokenChunkSize'); + setSelectedTokenChunkSize(tokenchunkSize); + localStorage.removeItem('selectedChunk_overlap'); + setSelectedChunk_overlap(chunkOverlap); + localStorage.removeItem('selectedChunks_to_combine'); + setSelectedChunks_to_combine(chunksToCombine); localStorage.removeItem('instructions'); setAdditionalInstructions(''); setMessages([ @@ -593,12 +611,12 @@ const Content: React.FC = ({ return prev.map((f) => { return f.name === filename ? { - ...f, - status: 'Ready to Reprocess', - processingProgress: isStartFromBegining ? 0 : f.processingProgress, - nodesCount: isStartFromBegining ? 0 : f.nodesCount, - relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount, - } + ...f, + status: 'Ready to Reprocess', + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + nodesCount: isStartFromBegining ? 0 : f.nodesCount, + relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount, + } : f; }); }); @@ -702,7 +720,8 @@ const Content: React.FC = ({ const selectedRows = childRef.current?.getSelectedRows(); if (selectedRows?.length) { const expiredFilesExists = selectedRows.some( - (c) => isFileReadyToProcess(c, true) && isExpired((c?.createdAt as Date) ?? new Date())); + (c) => isFileReadyToProcess(c, true) && isExpired((c?.createdAt as Date) ?? new Date()) + ); const largeFileExists = selectedRows.some( (c) => isFileReadyToProcess(c, true) && typeof c.size === 'number' && c.size > largeFileSize ); @@ -755,6 +774,11 @@ const Content: React.FC = ({ }); }, []); + const handleSchemaView = () => { + setOpenGraphView(true); + setViewPoint('showSchemaView'); + }; + return ( <> = ({
{!hasSelections ? : }
@@ -977,6 +1001,15 @@ const Content: React.FC = ({ > {buttonCaptions.showPreviewGraph} {selectedfileslength && completedfileNo ? `(${completedfileNo})` : ''} + + + = (props, re const { connectionStatus, setConnectionStatus, onInspect, onRetry, onChunkView } = props; const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } = useFileContext(); - const { userCredentials, isReadOnlyUser, chunksToBeProces } = useCredentials(); + const { userCredentials, isReadOnlyUser } = useCredentials(); const columnHelper = createColumnHelper(); const [columnFilters, setColumnFilters] = useState([]); const [isLoading, setIsLoading] = useState(false); @@ -86,7 +89,7 @@ const FileTable: ForwardRefRenderFunction = (props, re const { colorMode } = useContext(ThemeWrapperContext); const [copyRow, setCopyRow] = useState(false); const islargeDesktop = useMediaQuery(`(min-width:1440px )`); - + const { pathname } = useLocation(); const tableRef = useRef(null); const { updateStatusForLargeFiles } = useServerSideEvent( @@ -682,10 +685,7 @@ const FileTable: ForwardRefRenderFunction = (props, re const handleSmallFile = (item: SourceNode, userCredentials: UserCredentials) => { subscribe( item.fileName, - userCredentials?.uri, - userCredentials?.userName, - userCredentials?.database, - userCredentials?.password, + userCredentials, updatestatus, updateProgress ).catch(handleFileUploadError); @@ -694,10 +694,7 @@ const FileTable: ForwardRefRenderFunction = (props, re const handleLargeFile = (item: SourceNode, userCredentials: UserCredentials) => { triggerStatusUpdateAPI( item.fileName, - userCredentials.uri, - userCredentials.userName, - userCredentials.password, - userCredentials.database, + userCredentials, updateStatusForLargeFiles ); }; @@ -999,6 +996,13 @@ const FileTable: ForwardRefRenderFunction = (props, re <> {filesData ? ( <> + {filesData.length === 0 && pathname === '/readonly' && ( + + + + + + )} = (props, re - {`Large files may be partially processed up to ${chunksToBeProces} chunks due to resource limits.`} + {`Large files may be partially processed up to 10K characters due to resource limit.`} diff --git a/frontend/src/components/Graph/GraphPropertiesTable.tsx b/frontend/src/components/Graph/GraphPropertiesTable.tsx index fcabb0103..b4508b283 100644 --- a/frontend/src/components/Graph/GraphPropertiesTable.tsx +++ b/frontend/src/components/Graph/GraphPropertiesTable.tsx @@ -2,6 +2,7 @@ import { GraphLabel, Typography } from '@neo4j-ndl/react'; import { GraphPropertiesTableProps } from '../../types'; const GraphPropertiesTable = ({ propertiesWithTypes }: GraphPropertiesTableProps): JSX.Element => { + console.log('proerties', propertiesWithTypes); return (
@@ -10,24 +11,26 @@ const GraphPropertiesTable = ({ propertiesWithTypes }: GraphPropertiesTableProps Value
- {propertiesWithTypes.map(({ key, value }, _) => { - return ( -
-
- - {key} - + {propertiesWithTypes + .filter(({ value }) => value !== undefined && value !== null && value !== '' && !Array.isArray(value)) + .map(({ key, value }, _) => { + return ( +
+
+ + {key} + +
+
{value}
-
{value}
-
- ); - })} + ); + })}
); }; diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 3ea256be7..05d1f1897 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -24,7 +24,7 @@ import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { filterData, getCheckboxConditions, graphTypeFromNodes, processGraphData } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; -import { graphQueryAPI } from '../../services/GraphQuery'; +import { getGraphSchema, graphQueryAPI } from '../../services/GraphQuery'; import { graphLabels, nvlOptions, queryMap } from '../../utils/Constants'; import CheckboxSelection from './CheckboxSelection'; @@ -110,13 +110,17 @@ const GraphViewModal: React.FunctionComponent = ({ const fetchData = useCallback(async () => { try { - const nodeRelationshipData = - viewPoint === graphLabels.showGraphView - ? await graphQueryAPI( - graphQuery, - selectedRows?.map((f) => f.name) - ) - : await graphQueryAPI(graphQuery, [inspectedName ?? '']); + let nodeRelationshipData; + if (viewPoint === graphLabels.showGraphView) { + nodeRelationshipData = await graphQueryAPI( + graphQuery, + selectedRows?.map((f) => f.name) + ); + } else if (viewPoint === graphLabels.showSchemaView) { + nodeRelationshipData = await getGraphSchema(); + } else { + nodeRelationshipData = await graphQueryAPI(graphQuery, [inspectedName ?? '']); + } return nodeRelationshipData; } catch (error: any) { console.log(error); @@ -254,6 +258,8 @@ const GraphViewModal: React.FunctionComponent = ({ const headerTitle = viewPoint === graphLabels.showGraphView || viewPoint === graphLabels.chatInfoView ? graphLabels.generateGraph + : viewPoint === graphLabels.showSchemaView + ? graphLabels.renderSchemaGraph : `${graphLabels.inspectGeneratedGraphFrom} ${inspectedName}`; const checkBoxView = viewPoint !== graphLabels.chatInfoView; @@ -349,14 +355,15 @@ const GraphViewModal: React.FunctionComponent = ({ > {headerTitle} - {viewPoint !== graphLabels.chatInfoView && ( -
- - - - {graphLabels.chunksInfo} -
- )} + {viewPoint !== graphLabels.chatInfoView || + (viewPoint === graphLabels.showSchemaView && ( +
+ + + + {graphLabels.chunksInfo} +
+ ))} {checkBoxView && ( = ({ isExpanded, clearHistoryD const location = useLocation(); useEffect(() => { - if (location && location.state && Array.isArray(location.state)) { - setMessages(location.state); - } else if (location && location.state && Object.prototype.toString.call(location.state) === '[object Object]') { - setUserCredentials(location.state.credential); - setIsGCSActive(location.state.isGCSActive); - setGdsActive(location.state.isgdsActive); - setIsReadOnlyUser(location.state.isReadOnlyUser); + // const localStorageData = localStorage.getItem('neo4j.connection'); + // const connectionLocal = JSON.parse(localStorageData ?? ''); + // if (connectionStatus && (connectionLocal.uri === userCredentials?.uri)) { + if (connectionStatus) { + if (location && location.state && Array.isArray(location.state)) { + setMessages(location.state); + } else if ( + location && + location.state && + typeof location.state === 'object' && + Object.keys(location.state).length > 1 + ) { + setUserCredentials(location.state.credential); + setIsGCSActive(location.state.isGCSActive); + setGdsActive(location.state.isgdsActive); + setIsReadOnlyUser(location.state.isReadOnlyUser); + } } - }, [location]); + }, [location, connectionStatus]); const getIsLoading = (messages: Messages[]) => { return messages.length > 1 ? messages.some((msg) => msg.isTyping || msg.isLoading) : false; diff --git a/frontend/src/components/Layout/DrawerDropzone.tsx b/frontend/src/components/Layout/DrawerDropzone.tsx index 7396ff786..a00af742f 100644 --- a/frontend/src/components/Layout/DrawerDropzone.tsx +++ b/frontend/src/components/Layout/DrawerDropzone.tsx @@ -72,7 +72,7 @@ const DrawerDropzone: React.FC = ({ )} {APP_SOURCES.includes('local') && ( -
+
)} diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index a57571612..a622e43ba 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -47,21 +47,21 @@ const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnecti const isLoading = getIsLoading(messages); if (session) { const neo4jConnection = JSON.parse(session); - const { uri } = neo4jConnection; - const userName = neo4jConnection.user; - const { password } = neo4jConnection; - const { database } = neo4jConnection; + const { uri, userName, password, database } = neo4jConnection; const [, port] = uri.split(':'); const encodedPassword = btoa(password); const chatUrl = `/chat-only?uri=${encodeURIComponent( uri )}&user=${userName}&password=${encodedPassword}&database=${database}&port=${port}&connectionStatus=${connectionStatus}`; navigate(chatUrl, { state: { messages, isLoading } }); + } else if (connectionStatus) { + const chatUrl = `/chat-only?connectionStatus=${connectionStatus}`; + navigate(chatUrl, { state: { messages, isLoading } }); } else { const chatUrl = `/chat-only?openModal=true`; window.open(chatUrl, '_blank'); } - }, [messages]); + }, [messages, connectionStatus, navigate]); const onBackButtonClick = () => { navigate('/', { state: messages }); diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index f92ddc77c..0978a51b5 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -1,11 +1,11 @@ -import { lazy, Suspense, useEffect, useReducer, useState } from 'react'; +import { lazy, Suspense, useEffect, useMemo, useReducer, useState } from 'react'; import SideNav from './SideNav'; import DrawerDropzone from './DrawerDropzone'; import DrawerChatbot from './DrawerChatbot'; import Content from '../Content'; import { clearChatAPI } from '../../services/QnaAPI'; import { useCredentials } from '../../context/UserCredentials'; -import { connectionState, UserCredentials } from '../../types'; +import { connectionState } from '../../types'; import { useMessageContext } from '../../context/UserMessages'; import { useMediaQuery } from '@mui/material'; import { useFileContext } from '../../context/UsersFiles'; @@ -16,8 +16,12 @@ import { envConnectionAPI } from '../../services/ConnectAPI'; import { healthStatus } from '../../services/HealthStatus'; import { useNavigate } from 'react-router'; import { useAuth0 } from '@auth0/auth0-react'; +import { showErrorToast } from '../../utils/toasts'; +import { APP_SOURCES } from '../../utils/Constants'; import { createDefaultFormData } from '../../API/Index'; - +const GCSModal = lazy(() => import('../DataSources/GCS/GCSModal')); +const S3Modal = lazy(() => import('../DataSources/AWS/S3Modal')); +const GenericModal = lazy(() => import('../WebSources/GenericSourceModal')); const ConnectionModal = lazy(() => import('../Popups/ConnectionModal/ConnectionModal')); const PageLayout: React.FC = () => { @@ -28,14 +32,28 @@ const PageLayout: React.FC = () => { chunksExistsWithDifferentDimension: false, }); const isLargeDesktop = useMediaQuery(`(min-width:1440px )`); - const { userCredentials, connectionStatus, setIsReadOnlyUser } = useCredentials(); + const { + connectionStatus, + setIsReadOnlyUser, + setConnectionStatus, + setGdsActive, + setIsBackendConnected, + setUserCredentials, + setErrorMessage, + setShowDisconnectButton, + showDisconnectButton, + setIsGCSActive, + // setChunksToBeProces, + } = useCredentials(); const [isLeftExpanded, setIsLeftExpanded] = useState(Boolean(isLargeDesktop)); const [isRightExpanded, setIsRightExpanded] = useState(Boolean(isLargeDesktop)); const [showChatBot, setShowChatBot] = useState(false); const [showDrawerChatbot, setShowDrawerChatbot] = useState(true); const [showEnhancementDialog, toggleEnhancementDialog] = useReducer((s) => !s, false); const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); - const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); + const [showGCSModal, toggleGCSModal] = useReducer((s) => { + return !s; + }, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); const { user, isAuthenticated } = useAuth0(); @@ -54,25 +72,24 @@ const PageLayout: React.FC = () => { setIsRightExpanded(false); } }; - + const isYoutubeOnly = useMemo( + () => APP_SOURCES.includes('youtube') && !APP_SOURCES.includes('wiki') && !APP_SOURCES.includes('web'), + [] + ); + const isWikipediaOnly = useMemo( + () => APP_SOURCES.includes('wiki') && !APP_SOURCES.includes('youtube') && !APP_SOURCES.includes('web'), + [] + ); + const isWebOnly = useMemo( + () => APP_SOURCES.includes('web') && !APP_SOURCES.includes('youtube') && !APP_SOURCES.includes('wiki'), + [] + ); const { messages, setClearHistoryData, clearHistoryData, setMessages, setIsDeleteChatLoading } = useMessageContext(); const { setShowTextFromSchemaDialog, showTextFromSchemaDialog } = useFileContext(); - const { - setConnectionStatus, - setGdsActive, - setIsBackendConnected, - setUserCredentials, - setErrorMessage, - setShowDisconnectButton, - showDisconnectButton, - setIsGCSActive, - setChunksToBeProces, - } = useCredentials(); const { cancel } = useSpeechSynthesis(); useEffect(() => { async function initializeConnection() { - const session = localStorage.getItem('neo4j.connection'); // Fetch backend health status try { const response = await healthStatus(); @@ -85,128 +102,54 @@ const PageLayout: React.FC = () => { setShowDisconnectButton(isModalOpen); localStorage.setItem('disconnectButtonState', isModalOpen ? 'true' : 'false'); }; - const setUserCredentialsLocally = (credentials: any) => { - setUserCredentials(credentials); - createDefaultFormData(credentials); - setIsGCSActive(credentials.isGCSActive ?? false); - setGdsActive(credentials.isgdsActive); - setIsReadOnlyUser(credentials.isReadonlyUser); - setChunksToBeProces(credentials.chunksTobeProcess); - localStorage.setItem( - 'neo4j.connection', - JSON.stringify({ - uri: credentials.uri, - user: credentials.userName, - password: btoa(credentials.password), - database: credentials.database, - userDbVectorIndex: 384, - isReadOnlyUser: credentials.isReadonlyUser, - isgdsActive: credentials.isgdsActive, - isGCSActive: credentials.isGCSActive, - chunksTobeProcess: credentials.chunksTobeProcess, - email: credentials.email, - }) - ); - }; - const parseSessionAndSetCredentials = (neo4jConnection: string) => { - if (!neo4jConnection) { - console.error('Invalid session data:', neo4jConnection); - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - return; - } - try { - const parsedConnection = JSON.parse(neo4jConnection); - if (parsedConnection.uri && parsedConnection.user && parsedConnection.password && parsedConnection.database) { - const credentials = { - uri: parsedConnection.uri, - userName: parsedConnection.user, - password: atob(parsedConnection.password), - database: parsedConnection.database, - email: parsedConnection.email, - }; - setUserCredentials(credentials); - createDefaultFormData(credentials); - setGdsActive(parsedConnection.isgdsActive); - setIsReadOnlyUser(parsedConnection.isReadOnlyUser); - setIsGCSActive(parsedConnection.isGCSActive); - } else { - console.error('Invalid parsed session data:', parsedConnection); - } - } catch (error) { - console.error('Failed to parse session data:', error); - } - }; - // To update credentials if environment values differ - const updateSessionIfNeeded = (envCredentials: any, storedSession: string) => { - try { - const storedCredentials = JSON.parse(storedSession); - const isDiffCreds = - envCredentials.uri !== storedCredentials.uri || - envCredentials.userName !== storedCredentials.user || - btoa(envCredentials.password) !== storedCredentials.password || - envCredentials.database !== storedCredentials.database; - if (isDiffCreds) { - setUserCredentialsLocally(envCredentials); - setClearHistoryData(true); - return true; - } - return false; - } catch (error) { - console.error('Failed to update session:', error); - return false; - } - }; - // Handle connection initialization - let backendApiResponse; try { - backendApiResponse = await envConnectionAPI(); + const backendApiResponse = await envConnectionAPI(); const connectionData = backendApiResponse.data; if (connectionData.data && connectionData.status === 'Success') { - const envCredentials = { + const credentials = { uri: connectionData.data.uri, - password: atob(connectionData.data.password), - userName: connectionData.data.user_name, - database: connectionData.data.database, isReadonlyUser: !connectionData.data.write_access, isgdsActive: connectionData.data.gds_status, - isGCSActive: connectionData?.data?.gcs_file_cache === 'True', + isGCSActive: connectionData.data.gcs_file_cache === 'True', chunksTobeProcess: parseInt(connectionData.data.chunk_to_be_created), email: user?.email ?? '', + connection: 'backendApi', }; - setChunksToBeProces(envCredentials.chunksTobeProcess); - setIsGCSActive(envCredentials.isGCSActive); - if (session) { - const updated = updateSessionIfNeeded(envCredentials, session); - if (!updated) { - parseSessionAndSetCredentials(session); - } - setConnectionStatus(Boolean(connectionData.data.graph_connection)); - setIsBackendConnected(true); - } else { - setUserCredentialsLocally(envCredentials); - setConnectionStatus(true); - } + //setChunksToBeProces(credentials.chunksTobeProcess); + setIsGCSActive(credentials.isGCSActive); + setUserCredentials(credentials); + createDefaultFormData({ uri: credentials.uri, email: credentials.email ?? '' }); + setGdsActive(credentials.isgdsActive); + setConnectionStatus(Boolean(connectionData.data.graph_connection)); + setIsReadOnlyUser(connectionData.data.isReadonlyUser); handleDisconnectButtonState(false); - } else { - if (session) { - parseSessionAndSetCredentials(session); - setConnectionStatus(true); + } + else if (!connectionData.data && connectionData.status === 'Success') { + const storedCredentials = localStorage.getItem('neo4j.connection'); + if (storedCredentials) { + const credentials = JSON.parse(storedCredentials); + setUserCredentials({ ...credentials, password: atob(credentials.password) }); + createDefaultFormData({ ...credentials, password: atob(credentials.password) }); + //setChunksToBeProces(credentials.chunksTobeProcess); + setIsGCSActive(credentials.isGCSActive); + setGdsActive(credentials.isgdsActive); + setConnectionStatus(Boolean(credentials.connection === 'connectAPI')); + setIsReadOnlyUser(credentials.isReadonlyUser); + handleDisconnectButtonState(true); } else { - setErrorMessage(backendApiResponse?.data?.error); setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + handleDisconnectButtonState(true); } - handleDisconnectButtonState(true); - } - } catch (error) { - console.error('Error during backend API call:', error); - if (session) { - parseSessionAndSetCredentials(session); - setConnectionStatus(true); } else { setErrorMessage(backendApiResponse?.data?.error); setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + handleDisconnectButtonState(true); + console.log('from else cndition error is there') + } + } catch (error) { + if (error instanceof Error) { + showErrorToast(error.message); } - handleDisconnectButtonState(true); } } initializeConnection(); @@ -218,7 +161,6 @@ const PageLayout: React.FC = () => { setIsDeleteChatLoading(true); cancel(); const response = await clearChatAPI( - userCredentials as UserCredentials, sessionStorage.getItem('session_id') ?? '' ); setIsDeleteChatLoading(false); @@ -274,9 +216,8 @@ const PageLayout: React.FC = () => { > {isLargeDesktop ? (
{
) : ( <> - - + }> + + + }> + + + }> + +
-
+
+
- - Turn unstructured information into to rich insightful Knowledge Graph + It seems like you haven't ingested any data yet. To begin building your knowledge graph, you'll need to log + in to the main application.
diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 6e73da3a4..173da1e2d 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -52,7 +52,7 @@ export default function ConnectionModal({ errorMessage, setIsGCSActive, setShowDisconnectButton, - setChunksToBeProces, + // setChunksToBeProces, } = useCredentials(); const [isLoading, setIsLoading] = useState(false); const [searchParams, setSearchParams] = useSearchParams(); @@ -96,8 +96,8 @@ export default function ConnectionModal({ 'neo4j.connection', JSON.stringify({ uri: usercredential?.uri, - user: usercredential?.userName, - password: btoa(usercredential?.password), + userName: usercredential?.userName, + password: btoa(usercredential.password ?? ''), database: usercredential?.database, userDbVectorIndex: 384, }) @@ -241,12 +241,12 @@ export default function ConnectionModal({ setIsGCSActive(isGCSActive); setGdsActive(isgdsActive); setIsReadOnlyUser(isReadOnlyUser); - setChunksToBeProces(chunksTobeProcess); + // setChunksToBeProces(chunksTobeProcess); localStorage.setItem( 'neo4j.connection', JSON.stringify({ uri: connectionURI, - user: username, + userName: username, password: btoa(password), database: database, userDbVectorIndex, @@ -255,6 +255,7 @@ export default function ConnectionModal({ isGCSActive, chunksTobeProcess, email: user?.email ?? '', + connection:'connectAPI', }) ); setUserDbVectorIndex(response.data.data.db_vector_dimension); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/AdditionalInstructions/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/AdditionalInstructions/index.tsx index 89cbe5d5b..bec693dd8 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/AdditionalInstructions/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/AdditionalInstructions/index.tsx @@ -1,10 +1,19 @@ -import { Flex, TextArea, Typography, useMediaQuery } from '@neo4j-ndl/react'; -import { buttonCaptions } from '../../../../utils/Constants'; +import { Flex, Select, TextArea, Typography, useMediaQuery } from '@neo4j-ndl/react'; +import { + appLabels, + buttonCaptions, + defaultChunkOverlapOptions, + defaultTokenChunkSizeOptions, + defaultChunksToCombineOptions, + tooltips, +} from '../../../../utils/Constants'; import { tokens } from '@neo4j-ndl/base'; import ButtonWithToolTip from '../../../UI/ButtonWithToolTip'; import { useCallback } from 'react'; import { useFileContext } from '../../../../context/UsersFiles'; import { showNormalToast } from '../../../../utils/toasts'; +import { OnChangeValue } from 'react-select'; +import { OptionType } from '../../../../types'; export default function AdditionalInstructionsText({ closeEnhanceGraphSchemaDialog, @@ -13,52 +22,154 @@ export default function AdditionalInstructionsText({ }) { const { breakpoints } = tokens; const tablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); - const { additionalInstructions, setAdditionalInstructions } = useFileContext(); + const { + additionalInstructions, + setAdditionalInstructions, + setSelectedTokenChunkSize, + setSelectedChunk_overlap, + selectedTokenChunkSize, + selectedChunk_overlap, + selectedChunks_to_combine, + setSelectedChunks_to_combine, + } = useFileContext(); const clickAnalyzeInstructHandler = useCallback(async () => { localStorage.setItem('instructions', additionalInstructions); closeEnhanceGraphSchemaDialog(); showNormalToast(`Successfully Applied the Instructions`); }, [additionalInstructions]); + const onChangeChunk_size = (newValue: OnChangeValue) => { + if (newValue !== null) { + const parsedValue = Number(newValue.value); + if (isNaN(parsedValue)) { + showNormalToast('Chunk size must be a valid number'); + return; + } + setSelectedTokenChunkSize(parsedValue); + localStorage.setItem('selectedChunk_size', JSON.stringify({ selectedOption: parsedValue })); + } + }; + const onChangeChunk_overlap = (newValue: OnChangeValue) => { + if (newValue !== null) { + const parsedValue = Number(newValue.value); + if (isNaN(parsedValue)) { + showNormalToast('Chunk overlap must be a valid number'); + return; + } + setSelectedChunk_overlap(parsedValue); + localStorage.setItem('selectedChunk_overlap', JSON.stringify({ selectedOption: parsedValue })); + } + }; + const onChangeChunks_to_combine = (newValue: OnChangeValue) => { + if (newValue !== null) { + const parsedValue = Number(newValue.value); + if (isNaN(parsedValue)) { + showNormalToast('Chunks to combine must be a valid number'); + return; + } + setSelectedChunks_to_combine(parsedValue); + localStorage.setItem('selectedChunks_to_combine', JSON.stringify({ selectedOption: parsedValue })); + } + }; return ( - -
- - - - {buttonCaptions.provideAdditionalInstructions} - - - -