aktek-yazge
diff --git a/‎backend/score.py
Lines changed: 100 additions & 14 deletions b/‎backend/score.py
Lines changed: 100 additions & 14 deletions
diff --git a/‎backend/src/QA_integration.py
Lines changed: 4 additions & 3 deletions b/‎backend/src/QA_integration.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎backend/src/chunkid_entities.py
Lines changed: 11 additions & 2 deletions b/‎backend/src/chunkid_entities.py
Lines changed: 11 additions & 2 deletions
diff --git a/‎backend/src/communities.py
Lines changed: 3 additions & 2 deletions b/‎backend/src/communities.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎backend/src/neighbours.py
Lines changed: 63 additions & 0 deletions b/‎backend/src/neighbours.py
Lines changed: 63 additions & 0 deletions
diff --git a/‎backend/src/ragas_eval.py
Lines changed: 18 additions & 62 deletions b/‎backend/src/ragas_eval.py
Lines changed: 18 additions & 62 deletions
diff --git a/‎backend/src/shared/constants.py
Lines changed: 9 additions & 7 deletions b/‎backend/src/shared/constants.py
Lines changed: 9 additions & 7 deletions
@@ -276,8 +276,9 @@ def retrieve_documents(doc_retriever, messages):
         logging.info(f"Documents retrieved in {doc_retrieval_time:.2f} seconds")
 
     except Exception as e:
-        logging.error(f"Error retrieving documents: {e}")
-        raise
+        error_message = f"Error retrieving documents: {str(e)}"
+        logging.error(error_message)
+        raise RuntimeError(error_message)
 
     return docs,transformed_question
 
@@ -434,7 +435,7 @@ def process_chat_response(messages, history, question, model, graph, document_na
             total_tokens = 0
             formatted_docs = ""
 
-        question = transformed_question if transformed_question else question
+        # question = transformed_question if transformed_question else question
         # metrics = get_ragas_metrics(question,formatted_docs,content)
         # print(metrics)
 
 
@@ -1,6 +1,7 @@
 import logging
 from src.graph_query import *
 from src.shared.constants import * 
+import re
 
 def process_records(records):
     """
@@ -191,7 +192,11 @@ def get_entities_from_chunkids(uri, username, password, database ,nodedetails,en
             if "entitydetails" in nodedetails and nodedetails["entitydetails"]:
                 entity_ids = [item["id"] for item in nodedetails["entitydetails"]]
                 logging.info(f"chunkid_entities module: Starting for entity ids: {entity_ids}")
-                return process_entityids(driver, entity_ids)
+                result = process_entityids(driver, entity_ids)
+                if "chunk_data" in result.keys():
+                    for chunk in result["chunk_data"]:
+                        chunk["text"] = re.sub(r'\s+', ' ', chunk["text"])
+                return result
             else:
                 logging.info("chunkid_entities module: No entity ids are passed")
                 return default_response  
@@ -201,7 +206,11 @@ def get_entities_from_chunkids(uri, username, password, database ,nodedetails,en
             if "chunkdetails" in nodedetails and nodedetails["chunkdetails"]:
                 chunk_ids = [item["id"] for item in nodedetails["chunkdetails"]]
                 logging.info(f"chunkid_entities module: Starting for chunk ids: {chunk_ids}")
-                return process_chunkids(driver, chunk_ids, entities)
+                result = process_chunkids(driver, chunk_ids, entities)
+                if "chunk_data" in result.keys():
+                    for chunk in result["chunk_data"]:
+                        chunk["text"] = re.sub(r'\s+', ' ', chunk["text"])
+                return result
             else:
                 logging.info("chunkid_entities module: No chunk ids are passed")
                 return default_response
 
@@ -13,7 +13,8 @@
 NODE_PROJECTION_ENTITY = "__Entity__"
 MAX_WORKERS = 10
 MAX_COMMUNITY_LEVELS = 3 
-MIN_COMMUNITY_SIZE = 1
+MIN_COMMUNITY_SIZE = 1 
+COMMUNITY_CREATION_DEFAULT_MODEL = "openai_gpt_4o"
 
 
 CREATE_COMMUNITY_GRAPH_PROJECTION = """
@@ -466,7 +467,7 @@ def clear_communities(gds):
         raise
 
 
-def create_communities(uri, username, password, database,model):
+def create_communities(uri, username, password, database,model=COMMUNITY_CREATION_DEFAULT_MODEL):
     try:
         gds = get_gds_driver(uri, username, password, database)
         clear_communities(gds)
 
@@ -0,0 +1,63 @@
+import logging
+from src.graph_query import *
+
+NEIGHBOURS_FROM_ELEMENT_ID_QUERY = """
+MATCH (n) 
+WHERE elementId(n) = $element_id
+
+MATCH (n)<-[rels]->(m)  
+WITH n, 
+     ([n] + COLLECT(DISTINCT m)) AS allNodes, 
+     COLLECT(DISTINCT rels) AS allRels
+
+RETURN 
+    [node IN allNodes | 
+        node {
+            .*,
+            embedding: null,
+            text: null,
+            summary: null,
+            labels: [coalesce(apoc.coll.removeAll(labels(node), ['__Entity__'])[0], "*")],
+            element_id: elementId(node),
+            properties: { 
+                id: CASE WHEN node.id IS NOT NULL THEN node.id ELSE node.fileName END
+            }
+        }
+    ] AS nodes,
+    
+    [r IN allRels | 
+        {
+            start_node_element_id: elementId(startNode(r)),
+            end_node_element_id: elementId(endNode(r)),
+            type: type(r),
+            element_id: elementId(r)
+        }
+    ] AS relationships
+"""
+
+
+def get_neighbour_nodes(uri, username, password, database, element_id, query=NEIGHBOURS_FROM_ELEMENT_ID_QUERY):
+    driver = None
+
+    try:
+        logging.info(f"Querying neighbours for element_id: {element_id}")
+        driver = get_graphDB_driver(uri, username, password, database)
+        driver.verify_connectivity()
+        logging.info("Database connectivity verified.")
+
+        records, summary, keys = driver.execute_query(query,element_id=element_id)
+        nodes = records[0].get("nodes", [])
+        relationships = records[0].get("relationships", [])
+        result = {"nodes": nodes, "relationships": relationships}
+        
+        logging.info(f"Successfully retrieved neighbours for element_id: {element_id}")
+        return result
+    
+    except Exception as e:
+        logging.error(f"Error retrieving neighbours for element_id: {element_id}: {e}")
+        return {"nodes": [], "relationships": []}
+    
+    finally:
+        if driver is not None:
+            driver.close()
+            logging.info("Database driver closed.")
@@ -1,96 +1,52 @@
 import os
 import logging
 import time
-from typing import Dict, Tuple, Optional
-import boto3
+from src.llm import get_llm
 from datasets import Dataset
 from dotenv import load_dotenv
-from langchain_anthropic import ChatAnthropic
-from langchain_aws import ChatBedrock
-from langchain_community.chat_models import ChatOllama
-from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer
-from langchain_fireworks import ChatFireworks
-from langchain_google_vertexai import (
-    ChatVertexAI,
-    HarmBlockThreshold,
-    HarmCategory,
-)
-from langchain_groq import ChatGroq
-from langchain_openai import AzureChatOpenAI, ChatOpenAI
 from ragas import evaluate
-from ragas.metrics import answer_relevancy, context_utilization, faithfulness
+from ragas.metrics import answer_relevancy, faithfulness
 from src.shared.common_fn import load_embedding_model 
-
 load_dotenv()
 
-RAGAS_MODEL_VERSIONS = {
-    "openai_gpt_3.5": "gpt-3.5-turbo-16k",
-    "openai_gpt_4": "gpt-4-turbo-2024-04-09",
-    "openai_gpt_4o_mini": "gpt-4o-mini-2024-07-18",
-    "openai_gpt_4o": "gpt-4o-mini-2024-07-18",
-    "groq_llama3_70b": "groq_llama3_70b",
-}
-EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
+EMBEDDING_MODEL = os.getenv("RAGAS_EMBEDDING_MODEL")
 EMBEDDING_FUNCTION, _ = load_embedding_model(EMBEDDING_MODEL)
 
-
-def get_ragas_llm(model: str) -> Tuple[object, str]:
-    """Retrieves the specified language model.  Improved error handling and structure."""
-    env_key = f"LLM_MODEL_CONFIG_{model}"
-    env_value = os.environ.get(env_key)
-    logging.info(f"Loading model configuration: {env_key}")
-    try:
-        if "openai" in model:
-            model_name = RAGAS_MODEL_VERSIONS[model]
-            llm = ChatOpenAI(
-                api_key=os.environ.get("OPENAI_API_KEY"), model=model_name, temperature=0
-            )
-        elif "groq" in model:
-            model_name, base_url, api_key = env_value.split(",")
-            llm = ChatGroq(api_key=api_key, model_name=model_name, temperature=0)
-        else:
-            raise ValueError(f"Unsupported model for evaluation: {model}")
-
-        logging.info(f"Model loaded - Model Version: {model}")
-        return llm, model_name
-    except (ValueError, KeyError) as e:
-        logging.error(f"Error loading LLM: {e}")
-        raise
-
-
-def get_ragas_metrics(
-    question: str, context: str, answer: str, model: str
-) -> Optional[Dict[str, float]]:
+def get_ragas_metrics(question: str, context: list, answer: list, model: str):
     """Calculates RAGAS metrics."""
     try:
         start_time = time.time()
         dataset = Dataset.from_dict(
-            {"question": [question], "answer": [answer], "contexts": [[context]]}
+            {"question": [question] * len(answer), "answer": answer, "contexts": [[ctx] for ctx in context]}
         )
-        logging.info("Dataset created successfully.")
-
-        llm, model_name = get_ragas_llm(model=model)
+        logging.info("Evaluation dataset created successfully.")
+        if ("diffbot" in model) or ("ollama" in model):
+            raise ValueError(f"Unsupported model for evaluation: {model}")
+        else:
+            llm, model_name = get_llm(model=model)
+    
         logging.info(f"Evaluating with model: {model_name}")
-       
+
         score = evaluate(
             dataset=dataset,
-            metrics=[faithfulness, answer_relevancy, context_utilization],
+            metrics=[faithfulness, answer_relevancy],
             llm=llm,
             embeddings=EMBEDDING_FUNCTION,
         )
-
+        
         score_dict = (
-            score.to_pandas()[["faithfulness", "answer_relevancy", "context_utilization"]]
+            score.to_pandas()[["faithfulness", "answer_relevancy"]]
+            .fillna(0)
             .round(4)
-            .to_dict(orient="records")[0]
+            .to_dict(orient="list")
         ) 
         end_time = time.time()
         logging.info(f"Evaluation completed in: {end_time - start_time:.2f} seconds")
         return score_dict
     except ValueError as e:
        if "Unsupported model for evaluation" in str(e):
            logging.error(f"Unsupported model error: {e}")
-           return {"error": str(e)}  # Return the specific error message as a dictionary
+           return {"error": str(e)} 
        logging.exception(f"ValueError during metrics evaluation: {e}")
        return {"error": str(e)}
     except Exception as e:
 
@@ -121,7 +121,7 @@
 RETURN 
     d AS doc, 
     [chunk IN chunks | 
-        chunk {.*, embedding: null}
+        chunk {.*, embedding: null, element_id: elementId(chunk)}
     ] AS chunks,
     [
         node IN nodes | 
@@ -168,10 +168,10 @@
 CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD = 0.10
 
 CHAT_TOKEN_CUT_OFF = {
-     ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro", "gemini-1.5-flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, 
-     ("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai-gpt-4o", "openai-gpt-4o-mini") : 28,
+     ('openai_gpt_3.5','azure_ai_gpt_35',"gemini_1.0_pro","gemini_1.5_pro", "gemini_1.5_flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, 
+     ("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai_gpt_4o", "openai_gpt_4o_mini") : 28,
      ("ollama_llama3") : 2  
-} 
+}  
 
 ### CHAT TEMPLATES 
 CHAT_SYSTEM_TEMPLATE = """
@@ -473,14 +473,16 @@
             .*,
             embedding: null,
             fileName: d.fileName,
-            fileSource: d.fileSource
+            fileSource: d.fileSource, 
+            element_id: elementId(c)
         }
     ] AS chunks,
     [
         community IN communities WHERE community IS NOT NULL | 
         community {
             .*,
-            embedding: null
+            embedding: null,
+            element_id:elementId(community)
         }
     ] AS communities,
     [
@@ -551,7 +553,7 @@
 WHERE elementId(community) IN $communityids
 WITH collect(distinct community) AS communities
 RETURN [community IN communities | 
-        community {.*, embedding: null, elementid: elementId(community)}] AS communities
+        community {.*, embedding: null, element_id: elementId(community)}] AS communities
 """
 
 ## CHAT MODES