diff --git a/backend/Dockerfile b/backend/Dockerfile index c36f8ce2e..4488129d9 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -16,9 +16,10 @@ RUN apt-get update && \ # Set LD_LIBRARY_PATH ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH # Copy requirements file and install Python dependencies -COPY requirements.txt /code/ +COPY requirements.txt constraints.txt /code/ # --no-cache-dir --upgrade -RUN pip install -r requirements.txt +RUN pip install --upgrade pip +RUN pip install -r requirements.txt -c constraints.txt # Copy application code COPY . /code # Set command diff --git a/backend/constraints.txt b/backend/constraints.txt new file mode 100644 index 000000000..2c785f6de --- /dev/null +++ b/backend/constraints.txt @@ -0,0 +1,4 @@ +-f https://download.pytorch.org/whl/torch_stable.html +torch==2.3.1+cpu +torchvision==0.18.1+cpu +torchaudio==2.3.1+cpu \ No newline at end of file diff --git a/backend/score.py b/backend/score.py index 2318c22e8..e2a6c4e51 100644 --- a/backend/score.py +++ b/backend/score.py @@ -31,11 +31,10 @@ from src.ragas_eval import * from starlette.types import ASGIApp, Receive, Scope, Send from langchain_neo4j import Neo4jGraph -from src.entities.source_node import sourceNode from starlette.middleware.sessions import SessionMiddleware -from starlette.responses import HTMLResponse, RedirectResponse,JSONResponse from starlette.requests import Request -import secrets +from dotenv import load_dotenv +load_dotenv(override=True) logger = CustomLogger() CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") diff --git a/backend/src/llm.py b/backend/src/llm.py index b83773f42..ef835b560 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -180,13 +180,16 @@ async def get_graph_document_list( else: node_properties = ["description"] relationship_properties = ["description"] + TOOL_SUPPORTED_MODELS = {"qwen3", "deepseek"} + model_name = llm.model_name.lower() + ignore_tool_usage = not any(pattern in model_name for pattern in TOOL_SUPPORTED_MODELS) llm_transformer = LLMGraphTransformer( llm=llm, node_properties=node_properties, relationship_properties=relationship_properties, allowed_nodes=allowedNodes, allowed_relationships=allowedRelationship, - ignore_tool_usage=True, + ignore_tool_usage=ignore_tool_usage, additional_instructions=ADDITIONAL_INSTRUCTIONS+ (additional_instructions if additional_instructions else "") ) diff --git a/backend/src/main.py b/backend/src/main.py index c2f2a80f3..4bdb6ba51 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -665,39 +665,40 @@ def upload_file(graph, model, chunk, chunk_number:int, total_chunks:int, origina return f"Chunk {chunk_number}/{total_chunks} saved" def get_labels_and_relationtypes(uri, userName, password, database): - excluded_labels = {'Document', 'Chunk', '_Bloom_Perspective_', '__Community__', '__Entity__', 'Session', 'Message'} - excluded_relationships = { - 'PART_OF', 'NEXT_CHUNK', 'HAS_ENTITY', '_Bloom_Perspective_', 'FIRST_CHUNK', - 'SIMILAR', 'IN_COMMUNITY', 'PARENT_COMMUNITY', 'NEXT', 'LAST_MESSAGE'} - driver = get_graphDB_driver(uri, userName, password,database) - with driver.session(database=database) as session: - result = session.run("CALL db.schema.visualization() YIELD nodes, relationships RETURN nodes, relationships") - if not result: - return [] - record = result.single() - nodes = record["nodes"] - relationships = record["relationships"] - node_map = {} - for node in nodes: - node_id = node.element_id - labels = list(node.labels) - if labels: - node_map[node_id] = ":".join(labels) - triples = [] - for rel in relationships: - start_id = rel.start_node.element_id - end_id = rel.end_node.element_id - rel_type = rel.type - start_label = node_map.get(start_id) - end_label = node_map.get(end_id) - if start_label and end_label: - if ( - start_label not in excluded_labels and - end_label not in excluded_labels and - rel_type not in excluded_relationships - ): - triples.append(f"{start_label}-{rel_type}->{end_label}") - return {"triplets" : list(set(triples))} + excluded_labels = {'Document', 'Chunk', '_Bloom_Perspective_', '__Community__', '__Entity__', 'Session', 'Message'} + excluded_relationships = { + 'NEXT_CHUNK', '_Bloom_Perspective_', 'FIRST_CHUNK', + 'SIMILAR', 'IN_COMMUNITY', 'PARENT_COMMUNITY', 'NEXT', 'LAST_MESSAGE' + } + driver = get_graphDB_driver(uri, userName, password,database) + triples = set() + with driver.session(database=database) as session: + result = session.run(""" + MATCH (n)-[r]->(m) + RETURN DISTINCT labels(n) AS fromLabels, type(r) AS relType, labels(m) AS toLabels + """) + for record in result: + from_labels = record["fromLabels"] + to_labels = record["toLabels"] + rel_type = record["relType"] + from_label = next((lbl for lbl in from_labels if lbl not in excluded_labels), None) + to_label = next((lbl for lbl in to_labels if lbl not in excluded_labels), None) + if not from_label or not to_label: + continue + if rel_type == 'PART_OF': + if from_label == 'Chunk' and to_label == 'Document': + continue + elif rel_type == 'HAS_ENTITY': + if from_label == 'Chunk': + continue + elif ( + from_label in excluded_labels or + to_label in excluded_labels or + rel_type in excluded_relationships + ): + continue + triples.add(f"{from_label}-{rel_type}->{to_label}") + return {"triplets": list(triples)} def manually_cancelled_job(graph, filenames, source_types, merged_dir, uri): diff --git a/backend/src/make_relationships.py b/backend/src/make_relationships.py index a07f29c9c..97aa7e33e 100644 --- a/backend/src/make_relationships.py +++ b/backend/src/make_relationships.py @@ -1,6 +1,7 @@ from langchain_neo4j import Neo4jGraph from langchain.docstore.document import Document from src.shared.common_fn import load_embedding_model,execute_graph_query +from src.shared.common_fn import load_embedding_model,execute_graph_query import logging from typing import List import os @@ -34,6 +35,7 @@ def merge_relationship_between_chunk_and_entites(graph: Neo4jGraph, graph_docume MERGE (c)-[:HAS_ENTITY]->(n) """ execute_graph_query(graph,unwind_query, params={"batch_data": batch_data}) + execute_graph_query(graph,unwind_query, params={"batch_data": batch_data}) def create_chunk_embeddings(graph, chunkId_chunkDoc_list, file_name): @@ -60,6 +62,7 @@ def create_chunk_embeddings(graph, chunkId_chunkDoc_list, file_name): MERGE (c)-[:PART_OF]->(d) """ execute_graph_query(graph,query_to_create_embedding, params={"fileName":file_name, "data":data_for_query}) + execute_graph_query(graph,query_to_create_embedding, params={"fileName":file_name, "data":data_for_query}) def create_relation_between_chunks(graph, file_name, chunks: List[Document])->list: logging.info("creating FIRST_CHUNK and NEXT_CHUNK relationships between chunks") @@ -128,6 +131,7 @@ def create_relation_between_chunks(graph, file_name, chunks: List[Document])->li MERGE (c)-[:PART_OF]->(d) """ execute_graph_query(graph,query_to_create_chunk_and_PART_OF_relation, params={"batch_data": batch_data}) + execute_graph_query(graph,query_to_create_chunk_and_PART_OF_relation, params={"batch_data": batch_data}) query_to_create_FIRST_relation = """ UNWIND $relationships AS relationship @@ -137,6 +141,7 @@ def create_relation_between_chunks(graph, file_name, chunks: List[Document])->li MERGE (d)-[:FIRST_CHUNK]->(c)) """ execute_graph_query(graph,query_to_create_FIRST_relation, params={"f_name": file_name, "relationships": relationships}) + execute_graph_query(graph,query_to_create_FIRST_relation, params={"f_name": file_name, "relationships": relationships}) query_to_create_NEXT_CHUNK_relation = """ UNWIND $relationships AS relationship @@ -153,7 +158,7 @@ def create_relation_between_chunks(graph, file_name, chunks: List[Document])->li def create_chunk_vector_index(graph): start_time = time.time() try: - vector_index_query = "SHOW INDEXES YIELD * WHERE labelsOrTypes = ['Chunk'] and type = 'VECTOR' AND name = 'vector' return options" + vector_index_query = "SHOW INDEXES YIELD name, type, labelsOrTypes, properties WHERE name = 'vector' AND type = 'VECTOR' AND 'Chunk' IN labelsOrTypes AND 'embedding' IN properties RETURN name" vector_index = execute_graph_query(graph,vector_index_query) if not vector_index: vector_store = Neo4jVector(embedding=EMBEDDING_FUNCTION, @@ -168,7 +173,7 @@ def create_chunk_vector_index(graph): else: logging.info(f"Index already exist,Skipping creation. Time taken: {time.time() - start_time:.2f} seconds") except Exception as e: - if "EquivalentSchemaRuleAlreadyExists" in str(e): + if ("EquivalentSchemaRuleAlreadyExists" in str(e) or "An equivalent index already exists" in str(e)): logging.info("Vector index already exists, skipping creation.") else: raise \ No newline at end of file diff --git a/frontend/index.html b/frontend/index.html index 85618e155..c669468ee 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -1,16 +1,20 @@ -
- - - - - - -