From f46ebc2dd240aa2904faf165823c8e59f67efd05 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Wed, 12 Feb 2025 16:40:46 +0530 Subject: [PATCH 1/6] Dev to staging (#1070) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Read only mode for unauthenticated users (#1046) * llm name changes * build fix * default mode fix * ragas model names update * lint fixes * Chunk Entities API condition * added the tooltip for unsupported lllms for ragas metric loading * removed unused imports * multimode fix when we get error response * mode changes for score display * fix: Fixed the details state handling between multiple chats feature: Added the warning banner If selected llm model is not supported for raga's evaluation * Fix: Entity Mode Width Fix * diffbot fix for async (#797) * Minor changes (#798) * added congig variable for default diffbot chat model * fulltext index creation is skipped when the labels are empty * entity vector change * added optinal to communities for entity mode * updated the entity query --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * New: Added the supported llm models for ragas evaluation * Fix: Communitites Tab is displayed based communitites length * added the conversation download button (#800) * model name correction * chatmode switch mode fix * Add API payload GCP logging (#805) * Adding Links to get neighboring nodes (#796) * addition of link * added neighbours query * implemented with driver * updated the query * communitiesInfo name change * communities.tsx removed * api integration * modified response * entities change * chunk and communities * chunk space removal * added element id to chunks * loading on click * format changes * added file name for Dcoumrnt node * chat token cut off model name update * icon change * duplicate sources removal * Entity change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * added error message for doc retriver (#807) * copy row (#803) * copy row * column for copy * column copy * Raga's Evaluation For Multi Modes (#806) * Updatedmodels for ragas eval * context utilization metrics removed * updated supported llms for ragas * removed context utilization * Implemented Parallel API * multi api calls error resolved * MultiMode Metrics * Fix: Metric Evalution For Single Mode * multi modes ragas evaluation * api payload changes * metric api output format changed * multi mode ragas changes * removed pre process dataset * api response changes * Multimode metrics api integration * nan error for no answer resolved * QA integration changes --------- Co-authored-by: kaustubh-darekar * lint fixes * fix: multimode metrics state handling fix: lint fixes * fix: Multimode metrics mode change state issue fix: chunk list style issue * fix: list style fix * Correct TYPO mistake * added new env for ragas embedding model * Props name changes (#811) * Props name changes * removed the accesstoken from row on copy action * props changes for dropzone component * graph view changes --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * test * view graph * nodes count and relationshipcount updation fix * sourceUrl Fix * empty string "" fix to keep the default values we should keep the value blank instead "" * prop changes * props changes * retry condition update for failed files (#820) * Chat modes name changes (#815) * Props name changes * removed the accesstoken from row on copy action * updated chat mode names * Chat Modes Name Changes * lint fixes * using readble format In UI * removal of size to avoid console warning * key add --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * Youtube transcript fix with proxy (#822) * update script for async func * ragas changes for graph retrieval mode. context added in api output (#825) * Remove extract latency from logging and add LIMIT in duplicate nodes * Document updates (#828) * document updated with ragas evaluation information * formatting changes * chatbot api documentation updated * api details added in document * function name changed for drop create vector index api * Update README.md * updated api structire in docs (#827) * Update backend_docs.adoc * 821 llm model listing (#823) * added logic for document filters * LLM models * message change * link added * removed the text --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Exclude session lable node from duplicate nodes list * Added the tooltip for disabled llm option (#835) * node size changes * mode removal of rows check * formatting * Exclude __Entity__ node label from duplicate node list * Update README.md * Update README.md * Update README.md * Update README.md * fixed the youtube link * Security header and GZIPMiddleware (#847) * Added security header all API * Add GZipMiddleware * Chunk Text Details (#850) * Community title added * Added api for fetching chunk text details * output format changed for chunk text * integrated the service layer for chunkdata * added the chunks * formatting output of llm call for title generation * formatting llm output for title generation * added flex row * Changes related to pagination of fetch chunk api * Integrated the pagination * page changes error resolved for fetch chunk api * for get neighbours api , community title added in properties * moving community title related changes to separate branch * Removed Query module from fastapi import statement * icon changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Communities Id to Title (#851) * Staging to main (#735) * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added… * langchain updates (#1048) * testing script changed for better logging errors and results of various apis (#1050) * Deepseek models integration (#1051) * New models added deepseek v3 and r1 * models added in drop down * fixed top-line of drop-area (#1049) * Schema viz (#1035) * Graph enhancements (#795) * graph changes * graph properties changes * graph communities changes * graph type selection * checkbox check changes * format changes * Communities Bug fixes (#775) * added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * labels cahnge for nodes * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * enable communities * removed the selected prop * enable communities label change * communities name change * cred check * tooltip * fix: Copy Icon Theme Fix --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * mode changes for score display * llm name changes * build fix * default mode fix * ragas model names update * lint fixes * Chunk Entities API condition * added the tooltip for unsupported lllms for ragas metric loading * removed unused imports * multimode fix when we get error response * fix: Fixed the details state handling between multiple chats feature: Added the warning banner If selected llm model is not supported for raga's evaluation * Fix: Entity Mode Width Fix * New: Added the supported llm models for ragas evaluation * diffbot fix for async (#797) * Minor changes (#798) * added congig variable for default diffbot chat model * fulltext index creation is skipped when the labels are empty * entity vector change * added optinal to communities for entity mode * updated the entity query --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fix: Communitites Tab is displayed based communitites length * added the conversation download button (#800) * model name correction * Dev to Staging (#738) * added communities creation * added communities * connection _check * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.git… * updated to new ndl minor version and fixed sources modal display for small viewport devices * Chunk size overlap config (#1059) * default mode fix * ragas model names update * lint fixes * Chunk Entities API condition * added the tooltip for unsupported lllms for ragas metric loading * removed unused imports * multimode fix when we get error response * mode changes for score display * fix: Fixed the details state handling between multiple chats feature: Added the warning banner If selected llm model is not supported for raga's evaluation * Fix: Entity Mode Width Fix * diffbot fix for async (#797) * Minor changes (#798) * added congig variable for default diffbot chat model * fulltext index creation is skipped when the labels are empty * entity vector change * added optinal to communities for entity mode * updated the entity query --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * New: Added the supported llm models for ragas evaluation * Fix: Communitites Tab is displayed based communitites length * added the conversation download button (#800) * model name correction * chatmode switch mode fix * Add API payload GCP logging (#805) * Adding Links to get neighboring nodes (#796) * addition of link * added neighbours query * implemented with driver * updated the query * communitiesInfo name change * communities.tsx removed * api integration * modified response * entities change * chunk and communities * chunk space removal * added element id to chunks * loading on click * format changes * added file name for Dcoumrnt node * chat token cut off model name update * icon change * duplicate sources removal * Entity change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * added error message for doc retriver (#807) * copy row (#803) * copy row * column for copy * column copy * Raga's Evaluation For Multi Modes (#806) * Updatedmodels for ragas eval * context utilization metrics removed * updated supported llms for ragas * removed context utilization * Implemented Parallel API * multi api calls error resolved * MultiMode Metrics * Fix: Metric Evalution For Single Mode * multi modes ragas evaluation * api payload changes * metric api output format changed * multi mode ragas changes * removed pre process dataset * api response changes * Multimode metrics api integration * nan error for no answer resolved * QA integration changes --------- Co-authored-by: kaustubh-darekar * lint fixes * fix: multimode metrics state handling fix: lint fixes * fix: Multimode metrics mode change state issue fix: chunk list style issue * fix: list style fix * Correct TYPO mistake * added new env for ragas embedding model * Props name changes (#811) * Props name changes * removed the accesstoken from row on copy action * props changes for dropzone component * graph view changes --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * test * view graph * nodes count and relationshipcount updation fix * sourceUrl Fix * empty string "" fix to keep the default values we should keep the value blank instead "" * prop changes * props changes * retry condition update for failed files (#820) * Chat modes name changes (#815) * Props name changes * removed the accesstoken from row on copy action * updated chat mode names * Chat Modes Name Changes * lint fixes * using readble format In UI * removal of size to avoid console warning * key add --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * Youtube transcript fix with proxy (#822) * update script for async func * ragas changes for graph retrieval mode. context added in api output (#825) * Remove extract latency from logging and add LIMIT in duplicate nodes * Document updates (#828) * document updated with ragas evaluation information * formatting changes * chatbot api documentation updated * api details added in document * function name changed for drop create vector index api * Update README.md * updated api structire in docs (#827) * Update backend_docs.adoc * 821 llm model listing (#823) * added logic for document filters * LLM models * message change * link added * removed the text --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Exclude session lable node from duplicate nodes list * Added the tooltip for disabled llm option (#835) * node size changes * mode removal of rows check * formatting * Exclude __Entity__ node label from duplicate node list * Update README.md * Update README.md * Update README.md * Update README.md * fixed the youtube link * Security header and GZIPMiddleware (#847) * Added security header all API * Add GZipMiddleware * Chunk Text Details (#850) * Community title added * Added api for fetching chunk text details * output format changed for chunk text * integrated the service layer for chunkdata * added the chunks * formatting output of llm call for title generation * formatting llm output for title generation * added flex row * Changes related to pagination of fetch chunk api * Integrated the pagination * page changes error resolved for fetch chunk api * for get neighbours api , community title added in properties * moving community title related changes to separate branch * Removed Query module from fastapi import statement * icon changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Communities Id to Title (#851) * Staging to main (#735) * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * in… * fix-load-existing-schema (#1061) * added bug report feature request and format fixes * configured dependenabot for python * configuration fix * Fixed the logging time issue * Backend connection config (#1060) * Make all usercredential params are non-required * Add uri in backend_config_api * backend api connection * Handle string null in source_api_list API * config changes * few more changes * Handle usercredential None while backend connection config * bloom url * schema_visulization api issue fixed * handle creds sending * code refactring for API's * console issue * chat-history --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Unable to get the status of document node resolved due to leading spaces (#1064) * Unable to get the status of document node resolved due to leading and trailing spaces * removed trailing leading spaces while creating source node * graph driver connection resolved * updated dependency * always show schema button * always show schema button * Update README.md * Update README.md * Update README.md * Update README.md * Fixed the create community issue for backend connection configuration * removal of unused code * Support added for gpt 3o mini & gemini flash 2.0 in dev (#1069) --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: kaustubh-darekar Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: Ikko Eltociear Ashimine Co-authored-by: Jayanth T Co-authored-by: Jayanth T Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: Michael Hunger Co-authored-by: Kain Shu <44948284+Kain-90@users.noreply.github.com> Co-authored-by: destiny966113 <90891243+destiny966113@users.noreply.github.com> Co-authored-by: edenbuaa Co-authored-by: a-s-poorna Co-authored-by: Prakriti Solankey Co-authored-by: Marcos Cannabrava <54267712+marcoscannabrava@users.noreply.github.com> Co-authored-by: Nan An Co-authored-by: dhiaaeddine16 --- .github/ISSUE_TEMPLATE/bug-report..md | 25 ++ .github/ISSUE_TEMPLATE/feature-request.md | 25 ++ .github/dependabot.yml | 13 + README.md | 32 +- backend/example.env | 6 +- backend/requirements.txt | 39 +- backend/score.py | 179 ++++---- backend/src/communities.py | 5 + backend/src/create_chunks.py | 8 +- backend/src/diffbot_transformer.py | 2 +- backend/src/document_sources/local_file.py | 2 +- backend/src/document_sources/web_pages.py | 4 +- backend/src/graph_query.py | 31 +- backend/src/llm.py | 18 +- backend/src/main.py | 56 +-- backend/src/make_relationships.py | 3 +- backend/src/post_processing.py | 6 +- backend/src/shared/constants.py | 17 +- backend/test_integrationqa.py | 404 +++++++++--------- docker-compose.yml | 2 +- docs/backend/backend_docs.adoc | 53 +++ example.env | 3 + frontend/example.env | 3 + frontend/package.json | 6 +- frontend/src/API/Index.ts | 10 +- frontend/src/App.tsx | 1 + frontend/src/components/Auth/Auth.tsx | 1 + .../src/components/ChatBot/ChatInfoModal.tsx | 27 +- .../components/ChatBot/ChatOnlyComponent.tsx | 16 +- frontend/src/components/Content.tsx | 77 +++- frontend/src/components/FileTable.tsx | 26 +- .../components/Graph/GraphPropertiesTable.tsx | 37 +- .../src/components/Graph/GraphViewModal.tsx | 39 +- .../src/components/Layout/DrawerChatbot.tsx | 26 +- .../src/components/Layout/DrawerDropzone.tsx | 2 +- frontend/src/components/Layout/Header.tsx | 10 +- frontend/src/components/Layout/PageLayout.tsx | 220 ++++------ frontend/src/components/Login/Index.tsx | 9 +- .../ConnectionModal/ConnectionModal.tsx | 11 +- .../AdditionalInstructions/index.tsx | 189 ++++++-- .../EntityExtractionSetting.tsx | 21 +- .../Popups/GraphEnhancementDialog/index.tsx | 1 - .../src/components/UI/IconButtonToolTip.tsx | 3 + frontend/src/components/User/Profile.tsx | 2 +- frontend/src/context/UserCredentials.tsx | 21 +- frontend/src/context/UsersFiles.tsx | 34 +- frontend/src/services/CancelAPI.ts | 7 - frontend/src/services/ChunkEntitiesInfo.ts | 4 +- frontend/src/services/GetFiles.ts | 14 +- .../src/services/GetNodeLabelsRelTypes.ts | 2 +- frontend/src/services/GetOrphanNodes.ts | 2 +- frontend/src/services/GraphQuery.ts | 19 +- frontend/src/services/PollingAPI.ts | 31 +- frontend/src/services/QnaAPI.ts | 7 +- .../src/services/ServerSideStatusUpdateAPI.ts | 27 +- frontend/src/types.ts | 55 ++- frontend/src/utils/Constants.ts | 17 + frontend/src/utils/FileAPI.ts | 21 + frontend/yarn.lock | 248 +---------- 59 files changed, 1206 insertions(+), 973 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug-report..md create mode 100644 .github/ISSUE_TEMPLATE/feature-request.md create mode 100644 .github/dependabot.yml diff --git a/.github/ISSUE_TEMPLATE/bug-report..md b/.github/ISSUE_TEMPLATE/bug-report..md new file mode 100644 index 000000000..46a100d00 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report..md @@ -0,0 +1,25 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +## Guidelines +Please note that GitHub issues are only meant for bug reports/feature requests. + + +Before creating a new issue, please check whether someone else has raised the same issue. You may be able to add context to that issue instead of duplicating the report. However, each issue should also only be focussed on a _single_ problem, so do not describe new problems within an existing thread - these are very hard to track and manage, and your problem may be ignored. Finally, do not append comments to closed issues; if the same problem re-occurs, open a new issue, and include a link to the old one. + +To help us understand your issue, please specify important details, primarily: + +- LLM Graph builder version: X.Y.Z +- Neo4j Database version: X.Y.Z (Community/Enterprise/Aura). + +- **Steps to reproduce** +- Expected behavior +- Actual behavior + +Additionally, include (as appropriate) screenshots, drawings, etc. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 000000000..632385c5d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,25 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: feature +assignees: '' + +--- + +## Guidelines +Please note that GitHub issues are only meant for bug reports/feature requests. + +## Feature request template + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. \ No newline at end of file diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..bce3e3260 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,13 @@ +version: 2 +updates: + - package-ecosystem: 'npm' + directory: '/frontend' + schedule: + interval: 'weekly' + target-branch: 'dev' + + - package-ecosystem: 'pip' + directory: '/backend' + schedule: + interval: 'weekly' + target-branch: 'dev' \ No newline at end of file diff --git a/README.md b/README.md index 42b494ea3..2663a0863 100644 --- a/README.md +++ b/README.md @@ -36,10 +36,6 @@ According to enviornment we are configuring the models which is indicated by VIT EX: ```env VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" - -You can then run Docker Compose to build and start all components: -```bash -docker-compose up --build ``` #### Additional configs @@ -159,14 +155,14 @@ Allow unauthenticated request : Yes ## LLMs Supported 1. OpenAI 2. Gemini -3. Azure OpenAI(dev) -4. Anthropic(dev) -5. Fireworks(dev) -6. Groq(dev) -7. Amazon Bedrock(dev) -8. Ollama(dev) +3. Azure OpenAI(dev deployed version) +4. Anthropic(dev deployed version) +5. Fireworks(dev deployed version) +6. Groq(dev deployed version) +7. Amazon Bedrock(dev deployed version) +8. Ollama(dev deployed version) 9. Diffbot -10. Other OpenAI compabtile baseurl models(dev) +10. Other OpenAI compabtile baseurl models(dev deployed version) ## For local llms (Ollama) 1. Pull the docker imgage of ollama @@ -177,23 +173,27 @@ docker pull ollama/ollama ```bash docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama ``` -3. Execute any llm model ex🦙3 +3. Pull specific ollama model. +```bash +ollama pull llama3 +``` +4. Execute any llm model ex🦙3 ```bash docker exec -it ollama ollama run llama3 ``` -4. Configure env variable in docker compose or backend environment. +5. Configure env variable in docker compose. ```env LLM_MODEL_CONFIG_ollama_ #example LLM_MODEL_CONFIG_ollama_llama3=${LLM_MODEL_CONFIG_ollama_llama3-llama3, http://host.docker.internal:11434} ``` -5. Configure the backend API url +6. Configure the backend API url ```env VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-backendurl} ``` -6. Open the application in browser and select the ollama model for the extraction. -7. Enjoy Graph Building. +7. Open the application in browser and select the ollama model for the extraction. +8. Enjoy Graph Building. ## Usage diff --git a/backend/example.env b/backend/example.env index 9400ad747..4a66791a9 100644 --- a/backend/example.env +++ b/backend/example.env @@ -31,8 +31,10 @@ DEFAULT_DIFFBOT_CHAT_MODEL="openai_gpt_4o" #whichever model specified here , ne LLM_MODEL_CONFIG_openai_gpt_3.5="gpt-3.5-turbo-0125,openai_api_key" LLM_MODEL_CONFIG_openai_gpt_4o_mini="gpt-4o-mini-2024-07-18,openai_api_key" LLM_MODEL_CONFIG_openai_gpt_4o="gpt-4o-2024-11-20,openai_api_key" +LLM_MODEL_CONFIG_openai-gpt-o3-mini="o3-mini-2025-01-31,openai_api_key" LLM_MODEL_CONFIG_gemini_1.5_pro="gemini-1.5-pro-002" LLM_MODEL_CONFIG_gemini_1.5_flash="gemini-1.5-flash-002" +LLM_MODEL_CONFIG_gemini_2.0_flash="gemini-2.0-flash-001" LLM_MODEL_CONFIG_diffbot="diffbot,diffbot_api_key" LLM_MODEL_CONFIG_azure_ai_gpt_35="azure_deployment_name,azure_endpoint or base_url,azure_api_key,api_version" LLM_MODEL_CONFIG_azure_ai_gpt_4o="gpt-4o,https://YOUR-ENDPOINT.openai.azure.com/,azure_api_key,api_version" @@ -44,8 +46,10 @@ LLM_MODEL_CONFIG_ollama_llama3="model_name,model_local_url" YOUTUBE_TRANSCRIPT_PROXY="https://user:pass@domain:port" EFFECTIVE_SEARCH_RATIO=5 GRAPH_CLEANUP_MODEL="openai_gpt_4o" -CHUNKS_TO_BE_CREATED="50" BEDROCK_EMBEDDING_MODEL="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.titan-embed-text-v1" LLM_MODEL_CONFIG_bedrock_nova_micro_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-micro-v1:0" LLM_MODEL_CONFIG_bedrock_nova_lite_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-lite-v1:0" LLM_MODEL_CONFIG_bedrock_nova_pro_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-pro-v1:0" +LLM_MODEL_CONFIG_fireworks_deepseek_r1="model_name,fireworks_api_key" #model_name="accounts/fireworks/models/deepseek-r1" +LLM_MODEL_CONFIG_fireworks_deepseek_v3="model_name,fireworks_api_key" #model_name="accounts/fireworks/models/deepseek-v3" +MAX_TOKEN_CHUNK_SIZE=2000 #Max token used to process/extract the file content. \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index ee6a49bff..9836a1cb7 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,33 +1,33 @@ asyncio==3.4.3 -boto3==1.35.90 -botocore==1.35.90 +boto3==1.36.2 +botocore==1.36.2 certifi==2024.8.30 fastapi==0.115.6 fastapi-health==0.4.0 -google-api-core==2.23.0 -google-auth==2.36.0 +google-api-core==2.24.0 +google-auth==2.37.0 google_auth_oauthlib==1.2.1 google-cloud-core==2.4.1 json-repair==0.30.2 pip-install==1.3.5 -langchain==0.3.13 -langchain-aws==0.2.10 -langchain-anthropic==0.3.0 -langchain-fireworks==0.2.5 -langchain-community==0.3.13 -langchain-core==0.3.28 +langchain==0.3.15 +langchain-aws==0.2.11 +langchain-anthropic==0.3.3 +langchain-fireworks==0.2.6 +langchain-community==0.3.15 +langchain-core==0.3.31 langchain-experimental==0.3.4 -langchain-google-vertexai==2.0.7 -langchain-groq==0.2.1 -langchain-openai==0.2.14 -langchain-text-splitters==0.3.4 +langchain-google-vertexai==2.0.11 +langchain-groq==0.2.3 +langchain-openai==0.3.1 +langchain-text-splitters==0.3.5 langchain-huggingface==0.1.2 langdetect==1.0.9 -langsmith==0.2.4 -langserve==0.3.0 +langsmith==0.2.11 +langserve==0.3.1 neo4j-rust-ext nltk==3.9.1 -openai==1.58.1 +openai==1.59.9 opencv-python==4.10.0.84 psutil==6.1.0 pydantic==2.9.2 @@ -56,7 +56,6 @@ google-cloud-logging==3.11.3 pypandoc==1.13 graphdatascience==1.12 Secweb==1.11.0 -ragas==0.2.6 +ragas==0.2.11 rouge_score==0.1.2 -langchain-neo4j==0.2.0 - +langchain-neo4j==0.3.0 diff --git a/backend/score.py b/backend/score.py index e309fa9fc..7e7ceff7b 100644 --- a/backend/score.py +++ b/backend/score.py @@ -11,14 +11,14 @@ from langchain_google_vertexai import ChatVertexAI from src.api_response import create_api_response from src.graphDB_dataAccess import graphDBdataAccess -from src.graph_query import get_graph_results,get_chunktext_results +from src.graph_query import get_graph_results,get_chunktext_results,visualize_schema from src.chunkid_entities import get_entities_from_chunkids from src.post_processing import create_vector_fulltext_indexes, create_entity_embedding, graph_schema_consolidation from sse_starlette.sse import EventSourceResponse from src.communities import create_communities from src.neighbours import get_neighbour_nodes import json -from typing import List +from typing import List, Optional from google.oauth2.credentials import Credentials import os from src.logger import CustomLogger @@ -82,7 +82,7 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send): app = FastAPI() app.add_middleware(XContentTypeOptions) app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) -app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext"]) +app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext","/schema_visualization"]) app.add_middleware( CORSMiddleware, allow_origins=["*"], @@ -101,11 +101,11 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send): @app.post("/url/scan") async def create_source_knowledge_graph_url( - uri=Form(), - userName=Form(), - password=Form(), + uri=Form(None), + userName=Form(None), + password=Form(None), source_url=Form(None), - database=Form(), + database=Form(None), aws_access_key_id=Form(None), aws_secret_access_key=Form(None), wiki_query=Form(None), @@ -115,7 +115,7 @@ async def create_source_knowledge_graph_url( source_type=Form(None), gcs_project_id=Form(None), access_token=Form(None), - email=Form() + email=Form(None) ): try: @@ -172,11 +172,11 @@ async def create_source_knowledge_graph_url( @app.post("/extract") async def extract_knowledge_graph_from_file( - uri=Form(), - userName=Form(), - password=Form(), + uri=Form(None), + userName=Form(None), + password=Form(None), model=Form(), - database=Form(), + database=Form(None), source_url=Form(None), aws_access_key_id=Form(None), aws_secret_access_key=Form(None), @@ -189,11 +189,14 @@ async def extract_knowledge_graph_from_file( file_name=Form(None), allowedNodes=Form(None), allowedRelationship=Form(None), + token_chunk_size: Optional[int] = Form(None), + chunk_overlap: Optional[int] = Form(None), + chunks_to_combine: Optional[int] = Form(None), language=Form(None), access_token=Form(None), retry_condition=Form(None), additional_instructions=Form(None), - email=Form() + email=Form(None) ): """ Calls 'extract_graph_from_file' in a new thread to create Neo4jGraph from a @@ -215,22 +218,22 @@ async def extract_knowledge_graph_from_file( graphDb_data_Access = graphDBdataAccess(graph) merged_file_path = os.path.join(MERGED_DIR,file_name) if source_type == 'local file': - uri_latency, result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) elif source_type == 's3 bucket' and source_url: - uri_latency, result = await extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) elif source_type == 'web-url': - uri_latency, result = await extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) elif source_type == 'youtube' and source_url: - uri_latency, result = await extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) elif source_type == 'Wikipedia' and wiki_query: - uri_latency, result = await extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) elif source_type == 'gcs bucket' and gcs_bucket_name: - uri_latency, result = await extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions) + uri_latency, result = await extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) else: return create_api_response('Failed',message='source_type is other than accepted source') extract_api_time = time.time() - start_time @@ -278,7 +281,7 @@ async def extract_knowledge_graph_from_file( failed_file_process(uri,file_name, merged_file_path, source_type) node_detail = graphDb_data_Access.get_current_status_document_node(file_name) # Set the status "Completed" in logging becuase we are treating these error already handled by application as like custom errors. - json_obj = {'api_name':'extract','message':error_message,'file_created_at':node_detail[0]['created_time'],'error_message':error_message, 'file_name': file_name,'status':'Completed', + json_obj = {'api_name':'extract','message':error_message,'file_created_at':formatted_time(node_detail[0]['created_time']),'error_message':error_message, 'file_name': file_name,'status':'Completed', 'db_url':uri, 'userName':userName, 'database':database,'success_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc)),'email':email} logger.log_struct(json_obj, "INFO") return create_api_response("Failed", message = error_message, error=error_message, file_name=file_name) @@ -289,7 +292,7 @@ async def extract_knowledge_graph_from_file( failed_file_process(uri,file_name, merged_file_path, source_type) node_detail = graphDb_data_Access.get_current_status_document_node(file_name) - json_obj = {'api_name':'extract','message':message,'file_created_at':node_detail[0]['created_time'],'error_message':error_message, 'file_name': file_name,'status':'Failed', + json_obj = {'api_name':'extract','message':message,'file_created_at':formatted_time(node_detail[0]['created_time']),'error_message':error_message, 'file_name': file_name,'status':'Failed', 'db_url':uri, 'userName':userName, 'database':database,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc)),'email':email} logger.log_struct(json_obj, "ERROR") return create_api_response('Failed', message=message + error_message[:100], error=error_message, file_name = file_name) @@ -297,13 +300,18 @@ async def extract_knowledge_graph_from_file( gc.collect() @app.get("/sources_list") -async def get_source_list(uri:str, userName:str, password:str, email:str, database:str=None): +async def get_source_list(uri:str=None, userName:str=None, password:str=None, email:str=None, database:str=None): """ Calls 'get_source_list_from_graph' which returns list of sources which already exist in databse """ try: start = time.time() - decoded_password = decode_password(password) + if password is not None and password != "null": + decoded_password = decode_password(password) + else: + decoded_password = None + userName = None + database = None if " " in uri: uri = uri.replace(" ","+") result = await asyncio.to_thread(get_source_list_from_graph,uri,userName,decoded_password,database) @@ -320,7 +328,7 @@ async def get_source_list(uri:str, userName:str, password:str, email:str, databa return create_api_response(job_status, message=message, error=error_message) @app.post("/post_processing") -async def post_processing(uri=Form(), userName=Form(), password=Form(), database=Form(), tasks=Form(None), email=Form()): +async def post_processing(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None), tasks=Form(None), email=Form(None)): try: graph = create_graph_database_connection(uri, userName, password, database) tasks = set(map(str.strip, json.loads(tasks))) @@ -377,7 +385,7 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database gc.collect() @app.post("/chat_bot") -async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), database=Form(),question=Form(None), document_names=Form(None),session_id=Form(None),mode=Form(None),email=Form()): +async def chat_bot(uri=Form(None),model=Form(None),userName=Form(None), password=Form(None), database=Form(None),question=Form(None), document_names=Form(None),session_id=Form(None),mode=Form(None),email=Form(None)): logging.info(f"QA_RAG called at {datetime.now()}") qa_rag_start_time = time.time() try: @@ -409,10 +417,10 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), gc.collect() @app.post("/chunk_entities") -async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), nodedetails=Form(None),entities=Form(),mode=Form(),email=Form()): +async def chunk_entities(uri=Form(None),userName=Form(None), password=Form(None), database=Form(None), nodedetails=Form(None),entities=Form(),mode=Form(),email=Form(None)): try: start = time.time() - result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,nodedetails=nodedetails,entities=entities,mode=mode) + result = await asyncio.to_thread(get_entities_from_chunkids,nodedetails=nodedetails,entities=entities,mode=mode,uri=uri, username=userName, password=password, database=database) end = time.time() elapsed_time = end - start json_obj = {'api_name':'chunk_entities','db_url':uri, 'userName':userName, 'database':database, 'nodedetails':nodedetails,'entities':entities, @@ -429,7 +437,7 @@ async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=F gc.collect() @app.post("/get_neighbours") -async def get_neighbours(uri=Form(),userName=Form(), password=Form(), database=Form(), elementId=Form(None),email=Form()): +async def get_neighbours(uri=Form(None),userName=Form(None), password=Form(None), database=Form(None), elementId=Form(None),email=Form(None)): try: start = time.time() result = await asyncio.to_thread(get_neighbour_nodes,uri=uri, username=userName, password=password,database=database, element_id=elementId) @@ -449,12 +457,12 @@ async def get_neighbours(uri=Form(),userName=Form(), password=Form(), database=F @app.post("/graph_query") async def graph_query( - uri: str = Form(), - database: str = Form(), - userName: str = Form(), - password: str = Form(), + uri: str = Form(None), + database: str = Form(None), + userName: str = Form(None), + password: str = Form(None), document_names: str = Form(None), - email=Form() + email=Form(None) ): try: start = time.time() @@ -482,7 +490,7 @@ async def graph_query( @app.post("/clear_chat_bot") -async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=Form(), session_id=Form(None),email=Form()): +async def clear_chat_bot(uri=Form(None),userName=Form(None), password=Form(None), database=Form(None), session_id=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -502,20 +510,18 @@ async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=F gc.collect() @app.post("/connect") -async def connect(uri=Form(), userName=Form(), password=Form(), database=Form(),email=Form()): +async def connect(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph, database) gcs_file_cache = os.environ.get('GCS_FILE_CACHE') - chunk_to_be_created = int(os.environ.get('CHUNKS_TO_BE_CREATED', '50')) end = time.time() elapsed_time = end - start json_obj = {'api_name':'connect','db_url':uri, 'userName':userName, 'database':database, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}','email':email} logger.log_struct(json_obj, "INFO") result['elapsed_api_time'] = f'{elapsed_time:.2f}' result['gcs_file_cache'] = gcs_file_cache - result['chunk_to_be_created']= chunk_to_be_created return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -526,8 +532,8 @@ async def connect(uri=Form(), userName=Form(), password=Form(), database=Form(), @app.post("/upload") async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber=Form(None), totalChunks=Form(None), - originalname=Form(None), model=Form(None), uri=Form(), userName=Form(), - password=Form(), database=Form(),email=Form()): + originalname=Form(None), model=Form(None), uri=Form(None), userName=Form(None), + password=Form(None), database=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -551,7 +557,7 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber gc.collect() @app.post("/schema") -async def get_structured_schema(uri=Form(), userName=Form(), password=Form(), database=Form(),email=Form()): +async def get_structured_schema(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -582,14 +588,20 @@ def encode_password(pwd): return encoded_pwd_bytes @app.get("/update_extract_status/{file_name}") -async def update_extract_status(request:Request, file_name, url, userName, password, database): +async def update_extract_status(request: Request, file_name: str, uri:str=None, userName:str=None, password:str=None, database:str=None): async def generate(): status = '' - decoded_password = decode_password(password) - uri = url - if " " in url: - uri= url.replace(" ","+") - graph = create_graph_database_connection(uri, userName, decoded_password, database) + + if password is not None and password != "null": + decoded_password = decode_password(password) + else: + decoded_password = None + + url = uri + if url and " " in url: + url= url.replace(" ","+") + + graph = create_graph_database_connection(url, userName, decoded_password, database) graphDb_data_Access = graphDBdataAccess(graph) while True: try: @@ -625,14 +637,14 @@ async def generate(): return EventSourceResponse(generate(),ping=60) @app.post("/delete_document_and_entities") -async def delete_document_and_entities(uri=Form(), - userName=Form(), - password=Form(), - database=Form(), +async def delete_document_and_entities(uri=Form(None), + userName=Form(None), + password=Form(None), + database=Form(None), filenames=Form(), source_types=Form(), deleteEntities=Form(), - email=Form()): + email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -695,7 +707,7 @@ async def get_document_status(file_name, url, userName, password, database): return create_api_response('Failed',message=message) @app.post("/cancelled_job") -async def cancelled_job(uri=Form(), userName=Form(), password=Form(), database=Form(), filenames=Form(None), source_types=Form(None),email=Form()): +async def cancelled_job(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None), filenames=Form(None), source_types=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -716,7 +728,7 @@ async def cancelled_job(uri=Form(), userName=Form(), password=Form(), database=F gc.collect() @app.post("/populate_graph_schema") -async def populate_graph_schema(input_text=Form(None), model=Form(None), is_schema_description_checked=Form(None),email=Form()): +async def populate_graph_schema(input_text=Form(None), model=Form(None), is_schema_description_checked=Form(None),email=Form(None)): try: start = time.time() result = populate_graph_schema_from_text(input_text, model, is_schema_description_checked) @@ -735,7 +747,7 @@ async def populate_graph_schema(input_text=Form(None), model=Form(None), is_sche gc.collect() @app.post("/get_unconnected_nodes_list") -async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form(), database=Form(),email=Form()): +async def get_unconnected_nodes_list(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -756,7 +768,7 @@ async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form( gc.collect() @app.post("/delete_unconnected_nodes") -async def delete_orphan_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),unconnected_entities_list=Form(),email=Form()): +async def delete_orphan_nodes(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),unconnected_entities_list=Form(),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -777,7 +789,7 @@ async def delete_orphan_nodes(uri=Form(), userName=Form(), password=Form(), data gc.collect() @app.post("/get_duplicate_nodes") -async def get_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),email=Form()): +async def get_duplicate_nodes(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -798,7 +810,7 @@ async def get_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), data gc.collect() @app.post("/merge_duplicate_nodes") -async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),duplicate_nodes_list=Form(),email=Form()): +async def merge_duplicate_nodes(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None),duplicate_nodes_list=Form(),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -820,7 +832,7 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da gc.collect() @app.post("/drop_create_vector_index") -async def drop_create_vector_index(uri=Form(), userName=Form(), password=Form(), database=Form(), isVectorIndexExist=Form(),email=Form()): +async def drop_create_vector_index(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None), isVectorIndexExist=Form(),email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -842,7 +854,7 @@ async def drop_create_vector_index(uri=Form(), userName=Form(), password=Form(), gc.collect() @app.post("/retry_processing") -async def retry_processing(uri=Form(), userName=Form(), password=Form(), database=Form(), file_name=Form(), retry_condition=Form(), email=Form()): +async def retry_processing(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None), file_name=Form(), retry_condition=Form(), email=Form(None)): try: start = time.time() graph = create_graph_database_connection(uri, userName, password, database) @@ -938,13 +950,13 @@ async def calculate_additional_metrics(question: str = Form(), @app.post("/fetch_chunktext") async def fetch_chunktext( - uri: str = Form(), - database: str = Form(), - userName: str = Form(), - password: str = Form(), + uri: str = Form(None), + database: str = Form(None), + userName: str = Form(None), + password: str = Form(None), document_name: str = Form(), page_no: int = Form(1), - email=Form() + email=Form(None) ): try: start = time.time() @@ -991,26 +1003,21 @@ async def backend_connection_configuration(): database= os.getenv('NEO4J_DATABASE') password= os.getenv('NEO4J_PASSWORD') gcs_file_cache = os.environ.get('GCS_FILE_CACHE') - chunk_to_be_created = int(os.environ.get('CHUNKS_TO_BE_CREATED', '50')) if all([uri, username, database, password]): graph = Neo4jGraph() logging.info(f'login connection status of object: {graph}') if graph is not None: graph_connection = True - encoded_password = encode_password(password) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.connection_check_and_get_vector_dimensions(database) - result["graph_connection"] = graph_connection - result["uri"] = uri - result["user_name"] = username - result["database"] = database - result["password"] = encoded_password result['gcs_file_cache'] = gcs_file_cache - result['chunk_to_be_created']= chunk_to_be_created + result['uri'] = uri end = time.time() elapsed_time = end - start result['api_name'] = 'backend_connection_configuration' result['elapsed_api_time'] = f'{elapsed_time:.2f}' + result['graph_connection'] = f'{graph_connection}', + result['connection_from'] = 'backendAPI' logger.log_struct(result, "INFO") return create_api_response('Success',message=f"Backend connection successful",data=result) else: @@ -1025,6 +1032,32 @@ async def backend_connection_configuration(): return create_api_response(job_status, message=message, error=error_message.rstrip('.') + ', or fill from the login dialog.', data=graph_connection) finally: gc.collect() - + +@app.post("/schema_visualization") +async def get_schema_visualization(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None)): + try: + start = time.time() + result = await asyncio.to_thread(visualize_schema, + uri=uri, + userName=userName, + password=password, + database=database) + if result: + logging.info("Graph schema visualization query successful") + end = time.time() + elapsed_time = end - start + logging.info(f'Schema result from DB: {result}') + json_obj = {'api_name':'schema_visualization','db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") + return create_api_response('Success', data=result,message=f"Total elapsed API time {elapsed_time:.2f}") + except Exception as e: + message="Unable to get schema visualization from neo4j database" + error_message = str(e) + logging.info(message) + logging.exception(f'Exception:{error_message}') + return create_api_response("Failed", message=message, error=error_message) + finally: + gc.collect() + if __name__ == "__main__": uvicorn.run(app) \ No newline at end of file diff --git a/backend/src/communities.py b/backend/src/communities.py index a38b39696..0ecf493cc 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -193,6 +193,11 @@ def get_gds_driver(uri, username, password, database): try: + if all(v is None for v in [username, password]): + username= os.getenv('NEO4J_USERNAME') + database= os.getenv('NEO4J_DATABASE') + password= os.getenv('NEO4J_PASSWORD') + gds = GraphDataScience( endpoint=uri, auth=(username, password), diff --git a/backend/src/create_chunks.py b/backend/src/create_chunks.py index 0a1657568..523d2b77c 100644 --- a/backend/src/create_chunks.py +++ b/backend/src/create_chunks.py @@ -14,7 +14,7 @@ def __init__(self, pages: list[Document], graph: Neo4jGraph): self.pages = pages self.graph = graph - def split_file_into_chunks(self): + def split_file_into_chunks(self,token_chunk_size, chunk_overlap): """ Split a list of documents(file pages) into chunks of fixed size. @@ -25,8 +25,10 @@ def split_file_into_chunks(self): A list of chunks each of which is a langchain Document. """ logging.info("Split file into smaller chunks") - text_splitter = TokenTextSplitter(chunk_size=200, chunk_overlap=20) - chunk_to_be_created = int(os.environ.get('CHUNKS_TO_BE_CREATED', '50')) + text_splitter = TokenTextSplitter(chunk_size=token_chunk_size, chunk_overlap=chunk_overlap) + MAX_TOKEN_CHUNK_SIZE = int(os.getenv('MAX_TOKEN_CHUNK_SIZE', 10000)) + chunk_to_be_created = int(MAX_TOKEN_CHUNK_SIZE / token_chunk_size) + if 'page' in self.pages[0].metadata: chunks = [] for i, document in enumerate(self.pages): diff --git a/backend/src/diffbot_transformer.py b/backend/src/diffbot_transformer.py index 03e1ba69e..ccac42144 100644 --- a/backend/src/diffbot_transformer.py +++ b/backend/src/diffbot_transformer.py @@ -8,4 +8,4 @@ def get_graph_from_diffbot(graph,chunkId_chunkDoc_list:List): combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list) llm,model_name = get_llm('diffbot') graph_documents = llm.convert_to_graph_documents(combined_chunk_document_list) - return graph_documents \ No newline at end of file + return graph_documents diff --git a/backend/src/document_sources/local_file.py b/backend/src/document_sources/local_file.py index f674a202f..47e12ab48 100644 --- a/backend/src/document_sources/local_file.py +++ b/backend/src/document_sources/local_file.py @@ -62,4 +62,4 @@ def get_pages_with_page_numbers(unstructured_pages): 'filetype':page.metadata['filetype']} if page == unstructured_pages[-1]: pages.append(Document(page_content = page_content, metadata=metadata_with_custom_page_number)) - return pages \ No newline at end of file + return pages diff --git a/backend/src/document_sources/web_pages.py b/backend/src/document_sources/web_pages.py index 91c87510c..cdc0fb76a 100644 --- a/backend/src/document_sources/web_pages.py +++ b/backend/src/document_sources/web_pages.py @@ -6,11 +6,11 @@ def get_documents_from_web_page(source_url:str): try: pages = WebBaseLoader(source_url, verify_ssl=False).load() try: - file_name = pages[0].metadata['title'] + file_name = pages[0].metadata['title'].strip() if not file_name: file_name = last_url_segment(source_url) except: file_name = last_url_segment(source_url) return file_name, pages except Exception as e: - raise LLMGraphBuilderException(str(e)) \ No newline at end of file + raise LLMGraphBuilderException(str(e)) diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index aefaacbd1..f8054061f 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -3,7 +3,8 @@ from neo4j import GraphDatabase import os import json -from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY,CHUNK_TEXT_QUERY,COUNT_CHUNKS_QUERY + +from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY,CHUNK_TEXT_QUERY,COUNT_CHUNKS_QUERY,SCHEMA_VISUALIZATION_QUERY def get_graphDB_driver(uri, username, password,database="neo4j"): """ @@ -15,6 +16,11 @@ def get_graphDB_driver(uri, username, password,database="neo4j"): """ try: logging.info(f"Attempting to connect to the Neo4j database at {uri}") + if all(v is None for v in [username, password]): + username= os.getenv('NEO4J_USERNAME') + database= os.getenv('NEO4J_DATABASE') + password= os.getenv('NEO4J_PASSWORD') + enable_user_agent = os.environ.get("ENABLE_USER_AGENT", "False").lower() in ("true", "1", "yes") if enable_user_agent: driver = GraphDatabase.driver(uri, auth=(username, password),database=database, user_agent=os.environ.get('NEO4J_USER_AGENT')) @@ -228,7 +234,7 @@ def get_chunktext_results(uri, username, password, database, document_name, page offset = 10 skip = (page_no - 1) * offset limit = offset - driver = GraphDatabase.driver(uri, auth=(username, password)) + driver = get_graphDB_driver(uri, username, password,database) with driver.session(database=database) as session: total_chunks_result = session.run(COUNT_CHUNKS_QUERY, file_name=document_name) total_chunks = total_chunks_result.single()["total_chunks"] @@ -252,4 +258,23 @@ def get_chunktext_results(uri, username, password, database, document_name, page raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e finally: if driver: - driver.close() \ No newline at end of file + driver.close() + + +def visualize_schema(uri, userName, password, database): + """Retrieves graph schema""" + driver = None + try: + logging.info("Starting visualizing graph schema") + driver = get_graphDB_driver(uri, userName, password,database) + records, summary, keys = driver.execute_query(SCHEMA_VISUALIZATION_QUERY) + nodes = records[0].get("nodes", []) + relationships = records[0].get("relationships", []) + result = {"nodes": nodes, "relationships": relationships} + return result + except Exception as e: + logging.error(f"An error occurred schema retrieval. Error: {str(e)}") + raise Exception(f"An error occurred schema retrieval. Error: {str(e)}") + finally: + if driver: + driver.close() diff --git a/backend/src/llm.py b/backend/src/llm.py index ab49534ac..c1a5f3eeb 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -47,11 +47,16 @@ def get_llm(model: str): ) elif "openai" in model: model_name, api_key = env_value.split(",") - llm = ChatOpenAI( + if "o3-mini" in model: + llm= ChatOpenAI( + api_key=api_key, + model=model_name) + else: + llm = ChatOpenAI( api_key=api_key, model=model_name, temperature=0, - ) + ) elif "azure" in model: model_name, api_endpoint, api_key, api_version = env_value.split(",") @@ -121,9 +126,7 @@ def get_llm(model: str): return llm, model_name -def get_combined_chunks(chunkId_chunkDoc_list): - chunks_to_combine = int(os.environ.get("NUMBER_OF_CHUNKS_TO_COMBINE")) - logging.info(f"Combining {chunks_to_combine} chunks before sending request to LLM") +def get_combined_chunks(chunkId_chunkDoc_list, chunks_to_combine): combined_chunk_document_list = [] combined_chunks_page_content = [ "".join( @@ -190,11 +193,10 @@ async def get_graph_document_list( graph_document_list = await llm_transformer.aconvert_to_graph_documents(combined_chunk_document_list) return graph_document_list - -async def get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship, additional_instructions=None): +async def get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship, chunks_to_combine, additional_instructions=None): try: llm, model_name = get_llm(model) - combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list) + combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list, chunks_to_combine) if allowedNodes is None or allowedNodes=="": allowedNodes =[] diff --git a/backend/src/main.py b/backend/src/main.py index d5d9a143f..4359ad51f 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -123,7 +123,7 @@ def create_source_node_graph_web_url(graph, model, source_url, source_type): message = f"Unable to read data for given url : {source_url}" raise LLMGraphBuilderException(message) try: - title = pages[0].metadata['title'] + title = pages[0].metadata['title'].strip() if not title: title = last_url_segment(source_url) language = pages[0].metadata['language'] @@ -221,7 +221,7 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type lst_file_name.append({'fileName':obj_source_node.file_name,'fileSize':obj_source_node.file_size,'url':obj_source_node.url, 'language':obj_source_node.language, 'status':'Success'}) return lst_file_name,success_count,failed_count -async def extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, fileName, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, fileName, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): logging.info(f'Process file name :{fileName}') if not retry_condition: @@ -233,11 +233,11 @@ async def extract_graph_from_file_local_file(uri, userName, password, database, file_name, pages, file_extension = get_documents_from_file_by_path(merged_file_path,fileName) if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'File content is not available for file : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, True, merged_file_path) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, True, merged_file_path) else: - return await processing_source(uri, userName, password, database, model, fileName, [], allowedNodes, allowedRelationship, True, merged_file_path, retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, fileName, [], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, True, merged_file_path, retry_condition, additional_instructions=additional_instructions) -async def extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): if not retry_condition: if(aws_access_key_id==None or aws_secret_access_key==None): raise LLMGraphBuilderException('Please provide AWS access and secret keys') @@ -247,48 +247,48 @@ async def extract_graph_from_file_s3(uri, userName, password, database, model, s if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'File content is not available for file : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine) else: - return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition=retry_condition, additional_instructions=additional_instructions) -async def extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): if not retry_condition: file_name, pages = get_documents_from_web_page(source_url) if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'Content is not available for given URL : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine) else: - return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition=retry_condition, additional_instructions=additional_instructions) -async def extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): if not retry_condition: file_name, pages = get_documents_from_youtube(source_url) if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'Youtube transcript is not available for file : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine) else: - return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition=retry_condition, additional_instructions=additional_instructions) -async def extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): if not retry_condition: file_name, pages = get_documents_from_Wikipedia(wiki_query, language) if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'Wikipedia page is not available for file : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine) else: - return await processing_source(uri, userName, password, database, model, file_name,[], allowedNodes, allowedRelationship, retry_condition=retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, file_name,[], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition=retry_condition, additional_instructions=additional_instructions) -async def extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition, additional_instructions): +async def extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): if not retry_condition: file_name, pages = get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token) if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'File content is not available for file : {file_name}') - return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine) else: - return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition, additional_instructions=additional_instructions) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition=retry_condition, additional_instructions=additional_instructions) -async def processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, is_uploaded_from_local=None, merged_file_path=None, retry_condition=None, additional_instructions=None): +async def processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, is_uploaded_from_local=None, merged_file_path=None, retry_condition=None, additional_instructions=None): """ Extracts a Neo4jGraph from a PDF file based on the model. @@ -317,7 +317,7 @@ async def processing_source(uri, userName, password, database, model, file_name, graphDb_data_Access = graphDBdataAccess(graph) create_chunk_vector_index(graph) start_get_chunkId_chunkDoc_list = time.time() - total_chunks, chunkId_chunkDoc_list = get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition) + total_chunks, chunkId_chunkDoc_list = get_chunkId_chunkDoc_list(graph, file_name, pages, token_chunk_size, chunk_overlap, retry_condition) end_get_chunkId_chunkDoc_list = time.time() elapsed_get_chunkId_chunkDoc_list = end_get_chunkId_chunkDoc_list - start_get_chunkId_chunkDoc_list logging.info(f'Time taken to create list chunkids with chunk document: {elapsed_get_chunkId_chunkDoc_list:.2f} seconds') @@ -380,7 +380,7 @@ async def processing_source(uri, userName, password, database, model, file_name, break else: processing_chunks_start_time = time.time() - node_count,rel_count,latency_processed_chunk = await processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count, additional_instructions) + node_count,rel_count,latency_processed_chunk = await processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,chunks_to_combine,node_count, rel_count, additional_instructions) processing_chunks_end_time = time.time() processing_chunks_elapsed_end_time = processing_chunks_end_time - processing_chunks_start_time logging.info(f"Time taken {update_graph_chunk_processed} chunks processed upto {select_chunks_upto} completed in {processing_chunks_elapsed_end_time:.2f} seconds for file name {file_name}") @@ -457,7 +457,7 @@ async def processing_source(uri, userName, password, database, model, file_name, logging.error(error_message) raise LLMGraphBuilderException(error_message) -async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count, additional_instructions=None): +async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, chunks_to_combine, node_count, rel_count, additional_instructions=None): #create vector index and update chunk node with embedding latency_processing_chunk = {} if graph is not None: @@ -475,7 +475,7 @@ async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, logging.info("Get graph document list from models") start_entity_extraction = time.time() - graph_documents = await get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship, additional_instructions) + graph_documents = await get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship, chunks_to_combine, additional_instructions) end_entity_extraction = time.time() elapsed_entity_extraction = end_entity_extraction - start_entity_extraction logging.info(f'Time taken to extract enitities from LLM Graph Builder: {elapsed_entity_extraction:.2f} seconds') @@ -515,7 +515,7 @@ async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, rel_count += len(relations) return node_count,rel_count,latency_processing_chunk -def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): +def get_chunkId_chunkDoc_list(graph, file_name, pages, token_chunk_size, chunk_overlap, retry_condition): if not retry_condition: logging.info("Break down file into chunks") bad_chars = ['"', "\n", "'"] @@ -528,7 +528,7 @@ def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): text = text.replace(j, '') pages[i]=Document(page_content=str(text), metadata=pages[i].metadata) create_chunks_obj = CreateChunksofDocument(pages, graph) - chunks = create_chunks_obj.split_file_into_chunks() + chunks = create_chunks_obj.split_file_into_chunks(token_chunk_size, chunk_overlap) chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks) return len(chunks), chunkId_chunkDoc_list @@ -673,11 +673,11 @@ def get_labels_and_relationtypes(graph): query = """ RETURN collect { CALL db.labels() yield label - WHERE NOT label IN ['Document','Chunk','_Bloom_Perspective_', '__Community__', '__Entity__'] + WHERE NOT label IN ['Document','Chunk','_Bloom_Perspective_', '__Community__', '__Entity__', 'Session', 'Message'] return label order by label limit 100 } as labels, collect { CALL db.relationshipTypes() yield relationshipType as type - WHERE NOT type IN ['PART_OF', 'NEXT_CHUNK', 'HAS_ENTITY', '_Bloom_Perspective_','FIRST_CHUNK','SIMILAR','IN_COMMUNITY','PARENT_COMMUNITY'] + WHERE NOT type IN ['PART_OF', 'NEXT_CHUNK', 'HAS_ENTITY', '_Bloom_Perspective_','FIRST_CHUNK','SIMILAR','IN_COMMUNITY','PARENT_COMMUNITY', 'NEXT', 'LAST_MESSAGE'] return type order by type LIMIT 100 } as relationshipTypes """ graphDb_data_Access = graphDBdataAccess(graph) diff --git a/backend/src/make_relationships.py b/backend/src/make_relationships.py index 333f0c550..bccfa1ddd 100644 --- a/backend/src/make_relationships.py +++ b/backend/src/make_relationships.py @@ -161,7 +161,8 @@ def create_chunk_vector_index(graph): graph=graph, node_label="Chunk", embedding_node_property="embedding", - index_name="vector" + index_name="vector", + embedding_dimension=EMBEDDING_DIMENSION ) vector_store.create_new_index() logging.info(f"Index created successfully. Time taken: {time.time() - start_time:.2f} seconds") diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py index ccc3437f5..cdc8b06d3 100644 --- a/backend/src/post_processing.py +++ b/backend/src/post_processing.py @@ -3,6 +3,7 @@ import time from langchain_neo4j import Neo4jGraph import os +from src.graph_query import get_graphDB_driver from src.shared.common_fn import load_embedding_model from langchain_core.output_parsers import JsonOutputParser from langchain_core.prompts import ChatPromptTemplate @@ -137,7 +138,7 @@ def create_vector_fulltext_indexes(uri, username, password, database): logging.info("Starting the process of creating full-text indexes.") try: - driver = GraphDatabase.driver(uri, auth=(username, password), database=database) + driver = get_graphDB_driver(uri, username, password,database) driver.verify_connectivity() logging.info("Database connectivity verified.") except Exception as e: @@ -233,6 +234,3 @@ def graph_schema_consolidation(graph): graph.query(query) return None - - - \ No newline at end of file diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index eeb603245..b85654c8c 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -1,4 +1,3 @@ - OPENAI_MODELS = ["openai-gpt-3.5", "openai-gpt-4o", "openai-gpt-4o-mini"] GEMINI_MODELS = ["gemini-1.0-pro", "gemini-1.5-pro", "gemini-1.5-flash"] GROQ_MODELS = ["groq-llama3"] @@ -893,3 +892,19 @@ types such as dates, numbers, revenues, and other non-entity information are not extracted as separate nodes. Instead, treat these as properties associated with the relevant entities.""" +SCHEMA_VISUALIZATION_QUERY = """ +CALL db.schema.visualization() YIELD nodes, relationships +RETURN + [n IN nodes | { + element_id: elementId(n), + labels: labels(n), + properties: apoc.any.properties(n) + }] AS nodes, + [r IN relationships | { + type: type(r), + properties: apoc.any.properties(r), + element_id: elementId(r), + start_node_element_id: elementId(startNode(r)), + end_node_element_id: elementId(endNode(r)) + }] AS relationships; +""" diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 03d0d470b..2d7a5c5e5 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -6,28 +6,24 @@ import pandas as pd from datetime import datetime as dt from dotenv import load_dotenv -# from score import * from src.main import * from src.QA_integration import QA_RAG -from langserve import add_routes from src.ragas_eval import get_ragas_metrics from datasets import Dataset -from ragas import evaluate -# from ragas.metrics import answer_relevancy, context_utilization, faithfulness -# from ragas.dataset_schema import SingleTurnSample -# Load environment variables if needed +# Load environment variables load_dotenv() -import os URI = os.getenv('NEO4J_URI') USERNAME = os.getenv('NEO4J_USERNAME') PASSWORD = os.getenv('NEO4J_PASSWORD') DATABASE = os.getenv('NEO4J_DATABASE') - -CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") -MERGED_DIR = os.path.join(os.path.dirname(__file__), "merged_files") - -# Initialize database connection -graph = create_graph_database_connection(URI,USERNAME,PASSWORD,DATABASE) +# Logging configuration +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +# Directory Paths +BASE_DIR = os.path.dirname(__file__) +CHUNK_DIR = os.path.join(BASE_DIR, "chunks") +MERGED_DIR = os.path.join(BASE_DIR, "merged_files") +# Initialize Neo4j connection +graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) def create_source_node_local(graph, model, file_name): """Creates a source node for a local file.""" @@ -42,151 +38,130 @@ def create_source_node_local(graph, model, file_name): graphDB_data_Access.create_source_node(source_node) return source_node -def delete_extracted_files(file_path): - """Delete the extracted files once extraction process is completed""" - try: - if os.path.exists(file_path): - os.remove(file_path) - logging.info(f"Deleted file:{file_path}") - else: - logging.warning(f"File not found for deletion: {file_path}") - except Exception as e: - logging.error(f"Failed to delete file {file_path}. Error: {e}") def test_graph_from_file_local(model_name): - """Test graph creation from a local file.""" - try: - file_name = 'About Amazon.pdf' - shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf',os.path.join(MERGED_DIR, file_name)) - create_source_node_local(graph, model_name, file_name) - merged_file_path = os.path.join(MERGED_DIR, file_name) - local_file_result = asyncio.run(extract_graph_from_file_local_file(URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '',None)) - logging.info("Local file processing complete") - print(local_file_result) - return local_file_result - except Exception as e: - logging.error(f"Failed to delete file. Error: {e}") - -# try: -# assert local_file_result['status'] == 'Completed' -# assert local_file_result['nodeCount'] > 0 -# assert local_file_result['relationshipCount'] > 0 -# print("Success") -# except AssertionError as e: -# print("Fail: ", e) - - # Delete the file after processing -# delete_extracted_fiKles(merged_file_path) - - #return local_file_result + """Tests graph creation from a local file.""" + try: + file_name = 'About Amazon.pdf' + merged_file_path = os.path.join(MERGED_DIR, file_name) + shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf', merged_file_path) + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + create_source_node_local(graph, model_name, file_name) + result = asyncio.run( + extract_graph_from_file_local_file( + URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '', None, '' + ) + ) + logging.info(f"Local file test result: {result}") + return result + except Exception as e: + logging.error(f"Error in test_graph_from_file_local: {e}") + return {"status": "Failed", "error": str(e)} def test_graph_from_wikipedia(model_name): - try: - """Test graph creation from a Wikipedia page.""" + """Tests graph creation from a Wikipedia page.""" + try: wiki_query = 'https://en.wikipedia.org/wiki/Apollo_program' - source_type = 'Wikipedia' - file_name = "Apollo_program" - create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type) - - wiki_result = asyncio.run(extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, wiki_query, 'en',file_name, '', '',None)) - logging.info("Wikipedia test done") - print(wiki_result) - # try: - # assert wiki_result['status'] == 'Completed' - # assert wiki_result['nodeCount'] > 0 - # assert wiki_result['relationshipCount'] > 0 - # print("Success") - # except AssertionError as e: - # print("Fail: ", e) - - return wiki_result - except Exception as ex: - print('Hello error herte') - print(ex) - -def test_graph_website(model_name): - """Test graph creation from a Website page.""" - #graph, model, source_url, source_type - source_url = 'https://www.cloudskillsboost.google/' - source_type = 'web-url' - file_name = 'Google Cloud Skills Boost' - # file_name = [] - create_source_node_graph_web_url(graph, model_name, source_url, source_type) - - weburl_result = asyncio.run(extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url,file_name, '', '',None)) - logging.info("WebUrl test done") - print(weburl_result) - - # try: - # assert weburl_result['status'] == 'Completed' - # assert weburl_result['nodeCount'] > 0 - # assert weburl_result['relationshipCount'] > 0 - # print("Success") - # except AssertionError as e: - # print("Fail: ", e) - return weburl_result - + file_name = 'Apollo_program' + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, "Wikipedia") + result = asyncio.run( + extract_graph_from_file_Wikipedia( + URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 'en', file_name, '', '', None, '' + ) + ) + logging.info(f"Wikipedia test result: {result}") + return result + except Exception as e: + logging.error(f"Error in test_graph_from_wikipedia: {e}") + return {"status": "Failed", "error": str(e)} + def test_graph_from_youtube_video(model_name): - """Test graph creation from a YouTube video.""" - source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' - file_name = 'NKc8Tr5_L3w' - source_type = 'youtube' - create_source_node_graph_url_youtube(graph, model_name, source_url, source_type) - youtube_result = asyncio.run(extract_graph_from_file_youtube(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url,file_name,'','',None)) - logging.info("YouTube Video test done") - print(youtube_result) - -# try: -# assert youtube_result['status'] == 'Completed' -# assert youtube_result['nodeCount'] > 1 -# assert youtube_result['relationshipCount'] > 1 -# print("Success") -# except AssertionError as e: -# print("Failed: ", e) - - return youtube_result + """Tests graph creation from a YouTube video.""" + try: + source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' + file_name = 'NKc8Tr5_L3w' + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + create_source_node_graph_url_youtube(graph, model_name, source_url, "youtube") + result = asyncio.run( + extract_graph_from_file_youtube( + URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, file_name, '', '', None, '' + ) + ) + logging.info(f"YouTube video test result: {result}") + if isinstance(result, dict) and result.get("status") == "Failed": + return {"status": "Failed", "error": result.get("error", "Unknown error")} + return result + except Exception as e: + logging.error(f"Error in test_graph_from_youtube_video: {e}") + return {"status": "Failed", "error": str(e)} +def test_graph_website(model_name): + """Tests graph creation from a Website page.""" + try: + source_url = 'https://www.cloudskillsboost.google/' + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + create_source_node_graph_web_url(graph, model_name, source_url, "web-url") + result = asyncio.run( + extract_graph_from_web_page( + URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, "Google Cloud Skills Boost", '', '', None, '' + ) + ) + logging.info(f"Web URL test result: {result}") + if isinstance(result, dict) and result.get("status") == "Failed": + return {"status": "Failed", "error": result.get("error", "Unknown error")} + return result + except Exception as e: + logging.error(f"Error in test_graph_website: {e}") + return {"status": "Failed", "error": str(e)} + def test_chatbot_qna(model_name, mode='vector'): - """Test chatbot QnA functionality for different modes.""" - QA_n_RAG = QA_RAG(graph, model_name, 'Tell me about amazon', '[]', 1, mode) - print(QA_n_RAG) - print(len(QA_n_RAG['message'])) + """Tests chatbot QnA functionality.""" + try: + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + result = QA_RAG(graph, model_name, 'Tell me about Amazon', '[]', 1, mode) + # assert len(result['message']) > 20 + logging.info(f"Chatbot QnA test passed for mode: {mode}") + return result + except Exception as e: + logging.error(f"Error in chatbot QnA: {e}") + return {"status": "Failed", "error": str(e)} +def get_disconnected_nodes(): + """Fetches list of disconnected nodes.""" + try: + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + graphDb_data_Access = graphDBdataAccess(graph) + nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() + if not nodes_list: + return None,"No records found" + return nodes_list[0]["e"]["elementId"], "Records loaded successfully" if total_nodes['total'] > 0 else "No records found" + except Exception as e: + logging.error(f"Error in get_disconnected_nodes: {e}") + return None, "Error fetching nodes" +def delete_disconnected_nodes(lst_element_id): + """Deletes disconnected nodes from the graph.""" try: - assert len(QA_n_RAG['message']) > 20 - return QA_n_RAG - print("Success") - except AssertionError as e: - print("Failed ", e) - return QA_n_RAG - -#Get Test disconnected_nodes list -def disconected_nodes(): - #graph = create_graph_database_connection(uri, userName, password, database) - graphDb_data_Access = graphDBdataAccess(graph) - nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() - print(nodes_list[0]["e"]["elementId"]) - status = "False" - if total_nodes['total']>0: - status = "get_unconnected_nodes_list.. records loaded successfully" - else: - status = "get_unconnected_nodes_list ..records not loaded" - return nodes_list[0]["e"]["elementId"], status - -#Test Delete delete_disconnected_nodes list -def delete_disconected_nodes(lst_element_id): - print(f'disconnect elementid list {lst_element_id}') - #graph = create_graph_database_connection(uri, userName, password, database) - graphDb_data_Access = graphDBdataAccess(graph) - result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) - print(f'delete disconnect api result {result}') - if not result: - return "delete_unconnected_nodes..Succesfully deleted first index of disconnected nodes" - else: - return "delete_unconnected_nodes..Unable to delete Nodes" + graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + graphDb_data_Access = graphDBdataAccess(graph) + result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) + return "Successfully deleted disconnected nodes" if not result else "Failed to delete nodes" + except Exception as e: + logging.error(f"Error in delete_disconnected_nodes: {e}") + return "Error in deletion" -#Test Get Duplicate_nodes +def test_populate_graph_schema_from_text(model_name): + """Tests schema population from text.""" + try: + schema_text = "Amazon was founded on July 5, 1994, by Jeff Bezos in Bellevue, Washington." + result_schema = populate_graph_schema_from_text(schema_text, model_name, True) + logging.info(f"Schema test result: {result_schema}") + return result_schema + except Exception as e: + logging.error(f"Error in populate_graph_schema_from_text: {e}") + return {"status": "Failed", "error": str(e)} + def get_duplicate_nodes(): #graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) @@ -195,66 +170,95 @@ def get_duplicate_nodes(): return "Data successfully loaded" else: return "Unable to load data" - -#Test populate_graph_schema -def test_populate_graph_schema_from_text(model_name): - schema_text =('Amazon was founded on July 5, 1994, by Jeff Bezos in Bellevue, Washington.The company originally started as an online marketplace for books but gradually expanded its offerings to include a wide range of product categories. This diversification led to it being referred.') - #result_schema='' - try: - result_schema = populate_graph_schema_from_text(schema_text, model_name, True) - print(result_schema) - return result_schema - except Exception as e: - print("Failed to get schema from text", e) - return e - + def run_tests(): - final_list = [] - error_list = [] - - models = ['openai_gpt_4','openai_gpt_4o','openai_gpt_4o_mini','gemini_1.5_pro','gemini_1.5_flash'] - + """Runs all integration tests and logs results.""" + extract_list = [] + extract_error_list = [] + chatbot_list = [] + chatbot_error_list = [] + other_api_list = [] + models = ['openai_gpt_4','openai_gpt_4o','openai_gpt_4o_mini','gemini_1.5_pro','gemini_1.5_flash','gemini_2.0_flash','bedrock_nova_micro_v1','bedrock_nova_lite_v1','bedrock_nova_pro_v1','fireworks_qwen72b_instruct'] + chatbot_modes = [ + "vector", + "graph+vector", + "fulltext", + "graph+vector+fulltext", + "entity search+vector" + ] for model_name in models: + logging.info(f"Starting tests for model: {model_name}") + # Run each test independently to capture all errors + for test_func, test_args in [ + (test_graph_from_file_local, [model_name]), + (test_graph_from_wikipedia, [model_name]), + (test_graph_from_youtube_video,[model_name]), + (test_graph_website,[model_name]), + ]: + try: + result = test_func(*test_args) + if isinstance(result, dict) and result.get("status") == "Failed": + extract_error_list.append((model_name, test_func.__name__, result.get("error", "Unknown error"))) + else: + extract_list.append(result) + except Exception as e: + logging.error(f"Error in {test_func.__name__} for {model_name}: {e}") + extract_error_list.append((model_name, test_func.__name__, str(e))) + # Run all chatbot QnA modes + for mode in chatbot_modes: + try: + result = test_chatbot_qna(model_name,mode=mode) + if isinstance(result, dict) and result.get("status") == "Failed": + chatbot_error_list.append((model_name, f"test_chatbot_qna ({mode})", result.get("error", "Unknown error"))) + else: + chatbot_list.append(result) + except Exception as e: + logging.error(f"Error in test_chatbot_qna ({mode}) for {model_name}: {e}") + chatbot_error_list.append((model_name, f"test_chatbot_qna ({mode})", str(e))) + try: - final_list.append(test_graph_from_file_local(model_name)) - final_list.append(test_graph_from_wikipedia(model_name)) - final_list.append(test_graph_website(model_name)) - final_list.append(test_populate_graph_schema_from_text(model_name)) - final_list.append(test_graph_from_youtube_video(model_name)) - final_list.append(test_chatbot_qna(model_name)) - final_list.append(test_chatbot_qna(model_name, mode='vector')) - final_list.append(test_chatbot_qna(model_name, mode='graph+vector')) - final_list.append(test_chatbot_qna(model_name, mode='fulltext')) - final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) - final_list.append(test_chatbot_qna(model_name, mode='entity search+vector')) - + schema_result = test_populate_graph_schema_from_text(model_name) + print("KAUSTUBH : ",schema_result) + other_api_list.append({f"{model_name}":schema_result}) + print("other_api_list : ",other_api_list) except Exception as e: - error_list.append((model_name, str(e))) + logging.error(f"Error in test_populate_graph_schema_from_text for {model_name}: {e}") + other_api_list.append({f"{model_name}":str(e)}) + # Handle disconnected nodes separately + try: + dis_elementid, dis_status = get_disconnected_nodes() + delete_status = delete_disconnected_nodes([dis_elementid]) if dis_elementid else "No disconnected nodes found" + except Exception as e: + dis_status, delete_status = "Error fetching nodes", "Error deleting nodes" + logging.error(f"Error handling disconnected nodes: {e}") -# test_populate_graph_schema_from_text('openai-gpt-4o') -#delete diconnected nodes - dis_elementid, dis_status = disconected_nodes() - lst_element_id = [dis_elementid] - delt = delete_disconected_nodes(lst_element_id) - dup = get_duplicate_nodes() - print(final_list) - schma = test_populate_graph_schema_from_text(model_name) - # Save final results to CSV - df = pd.DataFrame(final_list) - print(df) - df['execution_date'] = dt.today().strftime('%Y-%m-%d') -#diconnected nodes - df['disconnected_nodes']=dis_status - df['get_duplicate_nodes']=dup + try: + dup = get_duplicate_nodes() + except Exception as e: + dup = "Error getting duplicate nodes" + logging.error(f"Error getting duplicate nodes: {e}") + # Convert results to DataFrame + df_extract = pd.DataFrame(extract_list) + df_extract['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_extract.to_csv(f"test_results/Extract_Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) - df['delete_disconected_nodes']=delt - df['test_populate_graph_schema_from_text'] = schma - df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + df_chatbot = pd.DataFrame(chatbot_list) + df_chatbot['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_chatbot.to_csv(f"test_results/chatbot_Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) - # Save error details to CSV - df_errors = pd.DataFrame(error_list, columns=['Model', 'Error']) - df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') - df_errors.to_csv(f"Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + other_api_dict = {'disconnected_nodes':dis_status,'delete_disconnected_nodes' : delete_status,'get_duplicate_nodes':dup,'test_populate_graph_schema_from_text':other_api_list} + with open(f"test_results/other_api_results_{dt.now().strftime('%Y%m%d_%H%M%S')}.txt", "w") as file: + file.write(json.dumps(other_api_dict, indent=4)) + # Save errors + if extract_error_list: + df_errors = pd.DataFrame(extract_error_list, columns=['Model', 'Function', 'Error']) + df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_errors.to_csv(f"test_results/Extract_Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + if chatbot_error_list: + df_errors = pd.DataFrame(chatbot_error_list, columns=['Model', 'Function', 'Error']) + df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_errors.to_csv(f"test_results/chatbot_Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + logging.info("All tests completed.") if __name__ == "__main__": - run_tests() \ No newline at end of file + run_tests() diff --git a/docker-compose.yml b/docker-compose.yml index 6f1b4174e..d3fe710f1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -65,7 +65,7 @@ services: - VITE_LLM_MODELS_PROD=${VITE_LLM_MODELS_PROD-openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash} - VITE_AUTH0_DOMAIN=${VITE_AUTH0_DOMAIN-} - VITE_AUTH0_CLIENT_ID=${VITE_AUTH0_CLIENT_ID-} - - VITE_SKIP_AUTH=${VITE_SKIP_AUTH-true} + - VITE_SKIP_AUTH=$VITE_SKIP_AUTH-true} - DEPLOYMENT_ENV=local volumes: - ./frontend:/app diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc index 058f88a58..d82f12870 100644 --- a/docs/backend/backend_docs.adoc +++ b/docs/backend/backend_docs.adoc @@ -1075,3 +1075,56 @@ The API responsible for create the connection obj from Neo4j DB based on environ "error": "Could not connect to Neo4j database. Please ensure that the username and password are correct", "message": "Unable to connect backend DB" } +.... + +=== Visualize graph DB schema +---- +POST /schema_visualization +---- + +User can visualize schema of the db through this API. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "nodes": [ + { + "element_id": "-5374", + "labels": [ + "Entity" + ], + "properties": { + "name": "Entity", + "indexes": [ + "id,description" + ], + "constraints": [] + } + }, + ], + "relationships": [ + { + "element_id": "-44223", + "end_node_element_id": "-5411", + "start_node_element_id": "-5342", + "properties": { + "name": "OWNED" + }, + "type": "OWNED" + }, + ] + }, + "message": "Total elapsed API time 3.51" +} +.... diff --git a/example.env b/example.env index 5d3a598c9..71c9dd36b 100644 --- a/example.env +++ b/example.env @@ -27,6 +27,9 @@ VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web" VITE_ENV="DEV" VITE_TIME_PER_PAGE=50 VITE_CHUNK_SIZE=5242880 +VITE_CHUNK_OVERLAP=20 +VITE_TOKENS_PER_CHUNK=100 +VITE_CHUNK_TO_COMBINE=1 VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" VITE_BATCH_SIZE=2 diff --git a/frontend/example.env b/frontend/example.env index f96efd207..1576bbea0 100644 --- a/frontend/example.env +++ b/frontend/example.env @@ -5,6 +5,9 @@ VITE_LLM_MODELS="diffbot,openai_gpt_3.5,openai_gpt_4o" VITE_ENV="DEV" VITE_TIME_PER_PAGE=50 VITE_CHUNK_SIZE=5242880 +VITE_CHUNK_OVERLAP=20 +VITE_TOKENS_PER_CHUNK=100 +VITE_CHUNK_TO_COMBINE=1 VITE_LARGE_FILE_SIZE=5242880 VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" diff --git a/frontend/package.json b/frontend/package.json index baedaee34..cc23f21f6 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -16,8 +16,8 @@ "@mui/material": "^5.15.10", "@mui/styled-engine": "^5.15.9", "@neo4j-devtools/word-color": "^0.0.8", - "@neo4j-ndl/base": "^3.0.16", - "@neo4j-ndl/react": "^3.0.30", + "@neo4j-ndl/base": "^3.2.9", + "@neo4j-ndl/react": "^3.2.18", "@neo4j-nvl/base": "^0.3.6", "@neo4j-nvl/react": "^0.3.6", "@react-oauth/google": "^0.12.1", @@ -34,8 +34,6 @@ "react-markdown": "^9.0.1", "react-router": "^6.23.1", "react-router-dom": "^6.23.1", - "remark-gfm": "^4.0.0", - "tailwind-merge": "^2.3.0", "uuid": "^9.0.1" }, "devDependencies": { diff --git a/frontend/src/API/Index.ts b/frontend/src/API/Index.ts index b89924221..d3144379e 100644 --- a/frontend/src/API/Index.ts +++ b/frontend/src/API/Index.ts @@ -9,11 +9,11 @@ const api = axios.create({ export const createDefaultFormData = (userCredentials: UserCredentials) => { const formData = new FormData(); - formData.append('uri', userCredentials?.uri ?? ''); - formData.append('database', userCredentials?.database ?? ''); - formData.append('userName', userCredentials?.userName ?? ''); - formData.append('password', userCredentials?.password ?? ''); - formData.append('email', userCredentials?.email ?? ''); + if (userCredentials?.uri) formData.append('uri', userCredentials?.uri); + if (userCredentials?.database) formData.append('database', userCredentials?.database); + if (userCredentials?.userName) formData.append('userName', userCredentials?.userName); + if (userCredentials?.password) formData.append('password', userCredentials?.password); + if (userCredentials?.email) formData.append('email', userCredentials?.email); api.interceptors.request.use( (config) => { if (config.data instanceof FormData) { diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 6a7ce0948..f975e5c52 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -8,6 +8,7 @@ const App = () => { return ( : }> + }> }> ); diff --git a/frontend/src/components/Auth/Auth.tsx b/frontend/src/components/Auth/Auth.tsx index a9fdc568b..30849d4da 100644 --- a/frontend/src/components/Auth/Auth.tsx +++ b/frontend/src/components/Auth/Auth.tsx @@ -7,6 +7,7 @@ const Auth0ProviderWithHistory: React.FC<{ children: React.ReactNode }> = ({ chi const navigate = useNavigate(); function onRedirectCallback(appState?: AppState) { + localStorage.removeItem('isReadOnlyMode'); navigate(appState?.returnTo || window.location.pathname, { state: appState }); } diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 779b591f9..d14c48be6 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -18,7 +18,6 @@ import { ExtendedNode, chatInfoMessage } from '../../types'; import { useEffect, useMemo, useReducer, useRef, useState } from 'react'; import GraphViewButton from '../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; -import { useCredentials } from '../../context/UserCredentials'; import { tokens } from '@neo4j-ndl/base'; import ChunkInfo from './ChunkInfo'; import EntitiesInfo from './EntitiesInfo'; @@ -82,12 +81,11 @@ const ChatInfoModal: React.FC = ({ error?.length ? 10 : mode === chatModeLables['global search+vector+fulltext'] - ? 7 - : mode === chatModeLables.graph - ? 4 - : 3 + ? 7 + : mode === chatModeLables.graph + ? 4 + : 3 ); - const { userCredentials } = useCredentials(); const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); const [showMetricsTable, setShowMetricsTable] = useState(Boolean(metricDetails)); @@ -99,15 +97,15 @@ const ChatInfoModal: React.FC = ({ multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length > 4 ? true : multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length <= 4 - ? false - : null + ? false + : null ); const [isAdditionalMetricsWithSingleMode, setIsAdditionalMetricsWithSingleMode] = useState( metricDetails != undefined && Object.keys(metricDetails).length > 3 ? true : metricDetails != undefined && Object.keys(metricDetails).length <= 3 - ? false - : null + ? false + : null ); const actions: React.ComponentProps>[] = useMemo( @@ -142,10 +140,9 @@ const ChatInfoModal: React.FC = ({ toggleInfoLoading(); try { const response = await chunkEntitiesAPI( - userCredentials?.database, nodeDetails, entities_ids, - mode + mode, ); if (response.data.status === 'Failure') { throw new Error(response.data.error); @@ -355,9 +352,9 @@ const ChatInfoModal: React.FC = ({ {mode != chatModeLables.graph ? Sources used : <>} {mode != chatModeLables.graph ? Chunks : <>} {mode === chatModeLables['graph+vector'] || - mode === chatModeLables.graph || - mode === chatModeLables['graph+vector+fulltext'] || - mode === chatModeLables['entity search+vector'] ? ( + mode === chatModeLables.graph || + mode === chatModeLables['graph+vector+fulltext'] || + mode === chatModeLables['entity search+vector'] ? ( Top Entities used ) : ( <> diff --git a/frontend/src/components/ChatBot/ChatOnlyComponent.tsx b/frontend/src/components/ChatBot/ChatOnlyComponent.tsx index 7a3236a97..33ee19bd9 100644 --- a/frontend/src/components/ChatBot/ChatOnlyComponent.tsx +++ b/frontend/src/components/ChatBot/ChatOnlyComponent.tsx @@ -34,9 +34,17 @@ const ChatContent: React.FC = ({ chatMessages }) => { const port = urlParams.get('port'); const email = urlParams.get('email'); const openModal = urlParams.get('open') === 'true'; + const connectionStatus = urlParams.get('connectionStatus') === 'true'; if (openModal || !(uri && user && encodedPassword && database && port)) { - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - } else { + if (connectionStatus) { + setShowBackButton(); + setConnectionStatus(connectionStatus); + setMessages(chatMessages); + } else { + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + } + } + else { const credentialsForAPI: UserCredentials = { uri, userName: user, @@ -75,9 +83,9 @@ const ChatContent: React.FC = ({ chatMessages }) => { try { setClearHistoryData(true); setIsDeleteChatLoading(true); - const credentials = JSON.parse(localStorage.getItem('neo4j.connection') || '{}') as UserCredentials; + // const credentials = JSON.parse(localStorage.getItem('neo4j.connection') || '{}') as UserCredentials; const sessionId = sessionStorage.getItem('session_id') || ''; - const response = await clearChatAPI(credentials, sessionId); + const response = await clearChatAPI(sessionId); setIsDeleteChatLoading(false); if (response.data.status !== 'Success') { setClearHistoryData(false); diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 9d5f348f6..93c9e42ab 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -17,6 +17,9 @@ import { llms, RETRY_OPIONS, tooltips, + tokenchunkSize, + chunkOverlap, + chunksToCombine, } from '../utils/Constants'; import ButtonWithToolTip from './UI/ButtonWithToolTip'; import DropdownComponent from './Dropdown'; @@ -37,6 +40,7 @@ import { getChunkText } from '../services/getChunkText'; import ChunkPopUp from './Popups/ChunkPopUp'; import { isExpired, isFileReadyToProcess } from '../utils/Utils'; import { useHasSelections } from '../hooks/useHasSelections'; +import { Hierarchy1Icon } from '@neo4j-ndl/react/icons'; const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -81,9 +85,15 @@ const Content: React.FC = ({ setModel, selectedNodes, selectedRels, + selectedTokenChunkSize, + selectedChunk_overlap, + selectedChunks_to_combine, setSelectedNodes, setRowSelection, setSelectedRels, + setSelectedTokenChunkSize, + setSelectedChunk_overlap, + setSelectedChunks_to_combine, postProcessingTasks, queue, processedCount, @@ -93,7 +103,7 @@ const Content: React.FC = ({ additionalInstructions, setAdditionalInstructions, } = useFileContext(); - const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView' | 'neighborView'>( + const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView' | 'neighborView'|'showSchemaView'>( 'tableView' ); const [showDeletePopUp, setShowDeletePopUp] = useState(false); @@ -143,10 +153,10 @@ const Content: React.FC = ({ ? postProcessingTasks.filter((task) => task !== 'graph_schema_consolidation') : postProcessingTasks : hasSelections - ? postProcessingTasks.filter( + ? postProcessingTasks.filter( (task) => task !== 'graph_schema_consolidation' && task !== 'enable_communities' ) - : postProcessingTasks.filter((task) => task !== 'enable_communities'); + : postProcessingTasks.filter((task) => task !== 'enable_communities'); if (payload.length) { const response = await postProcessing(payload); if (response.data.status === 'Success') { @@ -263,10 +273,7 @@ const Content: React.FC = ({ const { name } = fileItem; triggerStatusUpdateAPI( name as string, - userCredentials?.uri, - userCredentials?.userName, - userCredentials?.password, - userCredentials?.database, + userCredentials, updateStatusForLargeFiles ); } @@ -283,6 +290,9 @@ const Content: React.FC = ({ fileItem.gcsBucketFolder ?? '', selectedNodes.map((l) => l.value), selectedRels.map((t) => t.value), + selectedTokenChunkSize, + selectedChunk_overlap, + selectedChunks_to_combine, fileItem.googleProjectId, fileItem.language, fileItem.accessToken, @@ -531,13 +541,15 @@ const Content: React.FC = ({ const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; - const uriCoded = userCredentials?.uri.replace(/:\d+$/, ''); - const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${userCredentials?.port ?? '7687' - }`; - const encodedURL = encodeURIComponent(connectURL); - const replacedUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); - window.open(replacedUrl, '_blank'); - }; + let finalUrl = bloomUrl; + if (userCredentials?.database && userCredentials.uri && userCredentials.userName) { + const uriCoded = userCredentials.uri.replace(/:\d+$/, ''); + const connectURL = `${uriCoded.split('//')[0]}//${userCredentials.userName}@${uriCoded.split('//')[1]}:${userCredentials.port ?? '7687'}`; + const encodedURL = encodeURIComponent(connectURL); + finalUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); + } + window.open(finalUrl, '_blank'); + }; const handleGraphView = () => { setOpenGraphView(true); @@ -554,6 +566,12 @@ const Content: React.FC = ({ setUserCredentials({ uri: '', password: '', userName: '', database: '', email: '' }); setSelectedNodes([]); setSelectedRels([]); + localStorage.removeItem('selectedTokenChunkSize'); + setSelectedTokenChunkSize(tokenchunkSize); + localStorage.removeItem('selectedChunk_overlap'); + setSelectedChunk_overlap(chunkOverlap); + localStorage.removeItem('selectedChunks_to_combine'); + setSelectedChunks_to_combine(chunksToCombine); localStorage.removeItem('instructions'); setAdditionalInstructions(''); setMessages([ @@ -593,12 +611,12 @@ const Content: React.FC = ({ return prev.map((f) => { return f.name === filename ? { - ...f, - status: 'Ready to Reprocess', - processingProgress: isStartFromBegining ? 0 : f.processingProgress, - nodesCount: isStartFromBegining ? 0 : f.nodesCount, - relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount, - } + ...f, + status: 'Ready to Reprocess', + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + nodesCount: isStartFromBegining ? 0 : f.nodesCount, + relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount, + } : f; }); }); @@ -702,7 +720,8 @@ const Content: React.FC = ({ const selectedRows = childRef.current?.getSelectedRows(); if (selectedRows?.length) { const expiredFilesExists = selectedRows.some( - (c) => isFileReadyToProcess(c, true) && isExpired((c?.createdAt as Date) ?? new Date())); + (c) => isFileReadyToProcess(c, true) && isExpired((c?.createdAt as Date) ?? new Date()) + ); const largeFileExists = selectedRows.some( (c) => isFileReadyToProcess(c, true) && typeof c.size === 'number' && c.size > largeFileSize ); @@ -755,6 +774,11 @@ const Content: React.FC = ({ }); }, []); + const handleSchemaView = () => { + setOpenGraphView(true); + setViewPoint('showSchemaView'); + }; + return ( <> = ({
{!hasSelections ? : }
@@ -977,6 +1001,15 @@ const Content: React.FC = ({ > {buttonCaptions.showPreviewGraph} {selectedfileslength && completedfileNo ? `(${completedfileNo})` : ''} + + + = (props, re const { connectionStatus, setConnectionStatus, onInspect, onRetry, onChunkView } = props; const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } = useFileContext(); - const { userCredentials, isReadOnlyUser, chunksToBeProces } = useCredentials(); + const { userCredentials, isReadOnlyUser } = useCredentials(); const columnHelper = createColumnHelper(); const [columnFilters, setColumnFilters] = useState([]); const [isLoading, setIsLoading] = useState(false); @@ -86,7 +89,7 @@ const FileTable: ForwardRefRenderFunction = (props, re const { colorMode } = useContext(ThemeWrapperContext); const [copyRow, setCopyRow] = useState(false); const islargeDesktop = useMediaQuery(`(min-width:1440px )`); - + const { pathname } = useLocation(); const tableRef = useRef(null); const { updateStatusForLargeFiles } = useServerSideEvent( @@ -682,10 +685,7 @@ const FileTable: ForwardRefRenderFunction = (props, re const handleSmallFile = (item: SourceNode, userCredentials: UserCredentials) => { subscribe( item.fileName, - userCredentials?.uri, - userCredentials?.userName, - userCredentials?.database, - userCredentials?.password, + userCredentials, updatestatus, updateProgress ).catch(handleFileUploadError); @@ -694,10 +694,7 @@ const FileTable: ForwardRefRenderFunction = (props, re const handleLargeFile = (item: SourceNode, userCredentials: UserCredentials) => { triggerStatusUpdateAPI( item.fileName, - userCredentials.uri, - userCredentials.userName, - userCredentials.password, - userCredentials.database, + userCredentials, updateStatusForLargeFiles ); }; @@ -999,6 +996,13 @@ const FileTable: ForwardRefRenderFunction = (props, re <> {filesData ? ( <> + {filesData.length === 0 && pathname === '/readonly' && ( + + + + + + )} = (props, re - {`Large files may be partially processed up to ${chunksToBeProces} chunks due to resource limits.`} + {`Large files may be partially processed up to 10K characters due to resource limit.`} diff --git a/frontend/src/components/Graph/GraphPropertiesTable.tsx b/frontend/src/components/Graph/GraphPropertiesTable.tsx index fcabb0103..b4508b283 100644 --- a/frontend/src/components/Graph/GraphPropertiesTable.tsx +++ b/frontend/src/components/Graph/GraphPropertiesTable.tsx @@ -2,6 +2,7 @@ import { GraphLabel, Typography } from '@neo4j-ndl/react'; import { GraphPropertiesTableProps } from '../../types'; const GraphPropertiesTable = ({ propertiesWithTypes }: GraphPropertiesTableProps): JSX.Element => { + console.log('proerties', propertiesWithTypes); return (
@@ -10,24 +11,26 @@ const GraphPropertiesTable = ({ propertiesWithTypes }: GraphPropertiesTableProps Value
- {propertiesWithTypes.map(({ key, value }, _) => { - return ( -
-
- - {key} - + {propertiesWithTypes + .filter(({ value }) => value !== undefined && value !== null && value !== '' && !Array.isArray(value)) + .map(({ key, value }, _) => { + return ( +
+
+ + {key} + +
+
{value}
-
{value}
-
- ); - })} + ); + })}
); }; diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 3ea256be7..05d1f1897 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -24,7 +24,7 @@ import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { filterData, getCheckboxConditions, graphTypeFromNodes, processGraphData } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; -import { graphQueryAPI } from '../../services/GraphQuery'; +import { getGraphSchema, graphQueryAPI } from '../../services/GraphQuery'; import { graphLabels, nvlOptions, queryMap } from '../../utils/Constants'; import CheckboxSelection from './CheckboxSelection'; @@ -110,13 +110,17 @@ const GraphViewModal: React.FunctionComponent = ({ const fetchData = useCallback(async () => { try { - const nodeRelationshipData = - viewPoint === graphLabels.showGraphView - ? await graphQueryAPI( - graphQuery, - selectedRows?.map((f) => f.name) - ) - : await graphQueryAPI(graphQuery, [inspectedName ?? '']); + let nodeRelationshipData; + if (viewPoint === graphLabels.showGraphView) { + nodeRelationshipData = await graphQueryAPI( + graphQuery, + selectedRows?.map((f) => f.name) + ); + } else if (viewPoint === graphLabels.showSchemaView) { + nodeRelationshipData = await getGraphSchema(); + } else { + nodeRelationshipData = await graphQueryAPI(graphQuery, [inspectedName ?? '']); + } return nodeRelationshipData; } catch (error: any) { console.log(error); @@ -254,6 +258,8 @@ const GraphViewModal: React.FunctionComponent = ({ const headerTitle = viewPoint === graphLabels.showGraphView || viewPoint === graphLabels.chatInfoView ? graphLabels.generateGraph + : viewPoint === graphLabels.showSchemaView + ? graphLabels.renderSchemaGraph : `${graphLabels.inspectGeneratedGraphFrom} ${inspectedName}`; const checkBoxView = viewPoint !== graphLabels.chatInfoView; @@ -349,14 +355,15 @@ const GraphViewModal: React.FunctionComponent = ({ > {headerTitle} - {viewPoint !== graphLabels.chatInfoView && ( -
- - - - {graphLabels.chunksInfo} -
- )} + {viewPoint !== graphLabels.chatInfoView || + (viewPoint === graphLabels.showSchemaView && ( +
+ + + + {graphLabels.chunksInfo} +
+ ))} {checkBoxView && ( = ({ isExpanded, clearHistoryD const location = useLocation(); useEffect(() => { - if (location && location.state && Array.isArray(location.state)) { - setMessages(location.state); - } else if (location && location.state && Object.prototype.toString.call(location.state) === '[object Object]') { - setUserCredentials(location.state.credential); - setIsGCSActive(location.state.isGCSActive); - setGdsActive(location.state.isgdsActive); - setIsReadOnlyUser(location.state.isReadOnlyUser); + // const localStorageData = localStorage.getItem('neo4j.connection'); + // const connectionLocal = JSON.parse(localStorageData ?? ''); + // if (connectionStatus && (connectionLocal.uri === userCredentials?.uri)) { + if (connectionStatus) { + if (location && location.state && Array.isArray(location.state)) { + setMessages(location.state); + } else if ( + location && + location.state && + typeof location.state === 'object' && + Object.keys(location.state).length > 1 + ) { + setUserCredentials(location.state.credential); + setIsGCSActive(location.state.isGCSActive); + setGdsActive(location.state.isgdsActive); + setIsReadOnlyUser(location.state.isReadOnlyUser); + } } - }, [location]); + }, [location, connectionStatus]); const getIsLoading = (messages: Messages[]) => { return messages.length > 1 ? messages.some((msg) => msg.isTyping || msg.isLoading) : false; diff --git a/frontend/src/components/Layout/DrawerDropzone.tsx b/frontend/src/components/Layout/DrawerDropzone.tsx index 7396ff786..a00af742f 100644 --- a/frontend/src/components/Layout/DrawerDropzone.tsx +++ b/frontend/src/components/Layout/DrawerDropzone.tsx @@ -72,7 +72,7 @@ const DrawerDropzone: React.FC = ({ )} {APP_SOURCES.includes('local') && ( -
+
)} diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index a57571612..a622e43ba 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -47,21 +47,21 @@ const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnecti const isLoading = getIsLoading(messages); if (session) { const neo4jConnection = JSON.parse(session); - const { uri } = neo4jConnection; - const userName = neo4jConnection.user; - const { password } = neo4jConnection; - const { database } = neo4jConnection; + const { uri, userName, password, database } = neo4jConnection; const [, port] = uri.split(':'); const encodedPassword = btoa(password); const chatUrl = `/chat-only?uri=${encodeURIComponent( uri )}&user=${userName}&password=${encodedPassword}&database=${database}&port=${port}&connectionStatus=${connectionStatus}`; navigate(chatUrl, { state: { messages, isLoading } }); + } else if (connectionStatus) { + const chatUrl = `/chat-only?connectionStatus=${connectionStatus}`; + navigate(chatUrl, { state: { messages, isLoading } }); } else { const chatUrl = `/chat-only?openModal=true`; window.open(chatUrl, '_blank'); } - }, [messages]); + }, [messages, connectionStatus, navigate]); const onBackButtonClick = () => { navigate('/', { state: messages }); diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index f92ddc77c..0978a51b5 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -1,11 +1,11 @@ -import { lazy, Suspense, useEffect, useReducer, useState } from 'react'; +import { lazy, Suspense, useEffect, useMemo, useReducer, useState } from 'react'; import SideNav from './SideNav'; import DrawerDropzone from './DrawerDropzone'; import DrawerChatbot from './DrawerChatbot'; import Content from '../Content'; import { clearChatAPI } from '../../services/QnaAPI'; import { useCredentials } from '../../context/UserCredentials'; -import { connectionState, UserCredentials } from '../../types'; +import { connectionState } from '../../types'; import { useMessageContext } from '../../context/UserMessages'; import { useMediaQuery } from '@mui/material'; import { useFileContext } from '../../context/UsersFiles'; @@ -16,8 +16,12 @@ import { envConnectionAPI } from '../../services/ConnectAPI'; import { healthStatus } from '../../services/HealthStatus'; import { useNavigate } from 'react-router'; import { useAuth0 } from '@auth0/auth0-react'; +import { showErrorToast } from '../../utils/toasts'; +import { APP_SOURCES } from '../../utils/Constants'; import { createDefaultFormData } from '../../API/Index'; - +const GCSModal = lazy(() => import('../DataSources/GCS/GCSModal')); +const S3Modal = lazy(() => import('../DataSources/AWS/S3Modal')); +const GenericModal = lazy(() => import('../WebSources/GenericSourceModal')); const ConnectionModal = lazy(() => import('../Popups/ConnectionModal/ConnectionModal')); const PageLayout: React.FC = () => { @@ -28,14 +32,28 @@ const PageLayout: React.FC = () => { chunksExistsWithDifferentDimension: false, }); const isLargeDesktop = useMediaQuery(`(min-width:1440px )`); - const { userCredentials, connectionStatus, setIsReadOnlyUser } = useCredentials(); + const { + connectionStatus, + setIsReadOnlyUser, + setConnectionStatus, + setGdsActive, + setIsBackendConnected, + setUserCredentials, + setErrorMessage, + setShowDisconnectButton, + showDisconnectButton, + setIsGCSActive, + // setChunksToBeProces, + } = useCredentials(); const [isLeftExpanded, setIsLeftExpanded] = useState(Boolean(isLargeDesktop)); const [isRightExpanded, setIsRightExpanded] = useState(Boolean(isLargeDesktop)); const [showChatBot, setShowChatBot] = useState(false); const [showDrawerChatbot, setShowDrawerChatbot] = useState(true); const [showEnhancementDialog, toggleEnhancementDialog] = useReducer((s) => !s, false); const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); - const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); + const [showGCSModal, toggleGCSModal] = useReducer((s) => { + return !s; + }, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); const { user, isAuthenticated } = useAuth0(); @@ -54,25 +72,24 @@ const PageLayout: React.FC = () => { setIsRightExpanded(false); } }; - + const isYoutubeOnly = useMemo( + () => APP_SOURCES.includes('youtube') && !APP_SOURCES.includes('wiki') && !APP_SOURCES.includes('web'), + [] + ); + const isWikipediaOnly = useMemo( + () => APP_SOURCES.includes('wiki') && !APP_SOURCES.includes('youtube') && !APP_SOURCES.includes('web'), + [] + ); + const isWebOnly = useMemo( + () => APP_SOURCES.includes('web') && !APP_SOURCES.includes('youtube') && !APP_SOURCES.includes('wiki'), + [] + ); const { messages, setClearHistoryData, clearHistoryData, setMessages, setIsDeleteChatLoading } = useMessageContext(); const { setShowTextFromSchemaDialog, showTextFromSchemaDialog } = useFileContext(); - const { - setConnectionStatus, - setGdsActive, - setIsBackendConnected, - setUserCredentials, - setErrorMessage, - setShowDisconnectButton, - showDisconnectButton, - setIsGCSActive, - setChunksToBeProces, - } = useCredentials(); const { cancel } = useSpeechSynthesis(); useEffect(() => { async function initializeConnection() { - const session = localStorage.getItem('neo4j.connection'); // Fetch backend health status try { const response = await healthStatus(); @@ -85,128 +102,54 @@ const PageLayout: React.FC = () => { setShowDisconnectButton(isModalOpen); localStorage.setItem('disconnectButtonState', isModalOpen ? 'true' : 'false'); }; - const setUserCredentialsLocally = (credentials: any) => { - setUserCredentials(credentials); - createDefaultFormData(credentials); - setIsGCSActive(credentials.isGCSActive ?? false); - setGdsActive(credentials.isgdsActive); - setIsReadOnlyUser(credentials.isReadonlyUser); - setChunksToBeProces(credentials.chunksTobeProcess); - localStorage.setItem( - 'neo4j.connection', - JSON.stringify({ - uri: credentials.uri, - user: credentials.userName, - password: btoa(credentials.password), - database: credentials.database, - userDbVectorIndex: 384, - isReadOnlyUser: credentials.isReadonlyUser, - isgdsActive: credentials.isgdsActive, - isGCSActive: credentials.isGCSActive, - chunksTobeProcess: credentials.chunksTobeProcess, - email: credentials.email, - }) - ); - }; - const parseSessionAndSetCredentials = (neo4jConnection: string) => { - if (!neo4jConnection) { - console.error('Invalid session data:', neo4jConnection); - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - return; - } - try { - const parsedConnection = JSON.parse(neo4jConnection); - if (parsedConnection.uri && parsedConnection.user && parsedConnection.password && parsedConnection.database) { - const credentials = { - uri: parsedConnection.uri, - userName: parsedConnection.user, - password: atob(parsedConnection.password), - database: parsedConnection.database, - email: parsedConnection.email, - }; - setUserCredentials(credentials); - createDefaultFormData(credentials); - setGdsActive(parsedConnection.isgdsActive); - setIsReadOnlyUser(parsedConnection.isReadOnlyUser); - setIsGCSActive(parsedConnection.isGCSActive); - } else { - console.error('Invalid parsed session data:', parsedConnection); - } - } catch (error) { - console.error('Failed to parse session data:', error); - } - }; - // To update credentials if environment values differ - const updateSessionIfNeeded = (envCredentials: any, storedSession: string) => { - try { - const storedCredentials = JSON.parse(storedSession); - const isDiffCreds = - envCredentials.uri !== storedCredentials.uri || - envCredentials.userName !== storedCredentials.user || - btoa(envCredentials.password) !== storedCredentials.password || - envCredentials.database !== storedCredentials.database; - if (isDiffCreds) { - setUserCredentialsLocally(envCredentials); - setClearHistoryData(true); - return true; - } - return false; - } catch (error) { - console.error('Failed to update session:', error); - return false; - } - }; - // Handle connection initialization - let backendApiResponse; try { - backendApiResponse = await envConnectionAPI(); + const backendApiResponse = await envConnectionAPI(); const connectionData = backendApiResponse.data; if (connectionData.data && connectionData.status === 'Success') { - const envCredentials = { + const credentials = { uri: connectionData.data.uri, - password: atob(connectionData.data.password), - userName: connectionData.data.user_name, - database: connectionData.data.database, isReadonlyUser: !connectionData.data.write_access, isgdsActive: connectionData.data.gds_status, - isGCSActive: connectionData?.data?.gcs_file_cache === 'True', + isGCSActive: connectionData.data.gcs_file_cache === 'True', chunksTobeProcess: parseInt(connectionData.data.chunk_to_be_created), email: user?.email ?? '', + connection: 'backendApi', }; - setChunksToBeProces(envCredentials.chunksTobeProcess); - setIsGCSActive(envCredentials.isGCSActive); - if (session) { - const updated = updateSessionIfNeeded(envCredentials, session); - if (!updated) { - parseSessionAndSetCredentials(session); - } - setConnectionStatus(Boolean(connectionData.data.graph_connection)); - setIsBackendConnected(true); - } else { - setUserCredentialsLocally(envCredentials); - setConnectionStatus(true); - } + //setChunksToBeProces(credentials.chunksTobeProcess); + setIsGCSActive(credentials.isGCSActive); + setUserCredentials(credentials); + createDefaultFormData({ uri: credentials.uri, email: credentials.email ?? '' }); + setGdsActive(credentials.isgdsActive); + setConnectionStatus(Boolean(connectionData.data.graph_connection)); + setIsReadOnlyUser(connectionData.data.isReadonlyUser); handleDisconnectButtonState(false); - } else { - if (session) { - parseSessionAndSetCredentials(session); - setConnectionStatus(true); + } + else if (!connectionData.data && connectionData.status === 'Success') { + const storedCredentials = localStorage.getItem('neo4j.connection'); + if (storedCredentials) { + const credentials = JSON.parse(storedCredentials); + setUserCredentials({ ...credentials, password: atob(credentials.password) }); + createDefaultFormData({ ...credentials, password: atob(credentials.password) }); + //setChunksToBeProces(credentials.chunksTobeProcess); + setIsGCSActive(credentials.isGCSActive); + setGdsActive(credentials.isgdsActive); + setConnectionStatus(Boolean(credentials.connection === 'connectAPI')); + setIsReadOnlyUser(credentials.isReadonlyUser); + handleDisconnectButtonState(true); } else { - setErrorMessage(backendApiResponse?.data?.error); setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + handleDisconnectButtonState(true); } - handleDisconnectButtonState(true); - } - } catch (error) { - console.error('Error during backend API call:', error); - if (session) { - parseSessionAndSetCredentials(session); - setConnectionStatus(true); } else { setErrorMessage(backendApiResponse?.data?.error); setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + handleDisconnectButtonState(true); + console.log('from else cndition error is there') + } + } catch (error) { + if (error instanceof Error) { + showErrorToast(error.message); } - handleDisconnectButtonState(true); } } initializeConnection(); @@ -218,7 +161,6 @@ const PageLayout: React.FC = () => { setIsDeleteChatLoading(true); cancel(); const response = await clearChatAPI( - userCredentials as UserCredentials, sessionStorage.getItem('session_id') ?? '' ); setIsDeleteChatLoading(false); @@ -274,9 +216,8 @@ const PageLayout: React.FC = () => { > {isLargeDesktop ? (
{
) : ( <> - - + }> + + + }> + + + }> + +
-
+
+
- - Turn unstructured information into to rich insightful Knowledge Graph + It seems like you haven't ingested any data yet. To begin building your knowledge graph, you'll need to log + in to the main application.
diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 6e73da3a4..173da1e2d 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -52,7 +52,7 @@ export default function ConnectionModal({ errorMessage, setIsGCSActive, setShowDisconnectButton, - setChunksToBeProces, + // setChunksToBeProces, } = useCredentials(); const [isLoading, setIsLoading] = useState(false); const [searchParams, setSearchParams] = useSearchParams(); @@ -96,8 +96,8 @@ export default function ConnectionModal({ 'neo4j.connection', JSON.stringify({ uri: usercredential?.uri, - user: usercredential?.userName, - password: btoa(usercredential?.password), + userName: usercredential?.userName, + password: btoa(usercredential.password ?? ''), database: usercredential?.database, userDbVectorIndex: 384, }) @@ -241,12 +241,12 @@ export default function ConnectionModal({ setIsGCSActive(isGCSActive); setGdsActive(isgdsActive); setIsReadOnlyUser(isReadOnlyUser); - setChunksToBeProces(chunksTobeProcess); + // setChunksToBeProces(chunksTobeProcess); localStorage.setItem( 'neo4j.connection', JSON.stringify({ uri: connectionURI, - user: username, + userName: username, password: btoa(password), database: database, userDbVectorIndex, @@ -255,6 +255,7 @@ export default function ConnectionModal({ isGCSActive, chunksTobeProcess, email: user?.email ?? '', + connection:'connectAPI', }) ); setUserDbVectorIndex(response.data.data.db_vector_dimension); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/AdditionalInstructions/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/AdditionalInstructions/index.tsx index 89cbe5d5b..bec693dd8 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/AdditionalInstructions/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/AdditionalInstructions/index.tsx @@ -1,10 +1,19 @@ -import { Flex, TextArea, Typography, useMediaQuery } from '@neo4j-ndl/react'; -import { buttonCaptions } from '../../../../utils/Constants'; +import { Flex, Select, TextArea, Typography, useMediaQuery } from '@neo4j-ndl/react'; +import { + appLabels, + buttonCaptions, + defaultChunkOverlapOptions, + defaultTokenChunkSizeOptions, + defaultChunksToCombineOptions, + tooltips, +} from '../../../../utils/Constants'; import { tokens } from '@neo4j-ndl/base'; import ButtonWithToolTip from '../../../UI/ButtonWithToolTip'; import { useCallback } from 'react'; import { useFileContext } from '../../../../context/UsersFiles'; import { showNormalToast } from '../../../../utils/toasts'; +import { OnChangeValue } from 'react-select'; +import { OptionType } from '../../../../types'; export default function AdditionalInstructionsText({ closeEnhanceGraphSchemaDialog, @@ -13,52 +22,154 @@ export default function AdditionalInstructionsText({ }) { const { breakpoints } = tokens; const tablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); - const { additionalInstructions, setAdditionalInstructions } = useFileContext(); + const { + additionalInstructions, + setAdditionalInstructions, + setSelectedTokenChunkSize, + setSelectedChunk_overlap, + selectedTokenChunkSize, + selectedChunk_overlap, + selectedChunks_to_combine, + setSelectedChunks_to_combine, + } = useFileContext(); const clickAnalyzeInstructHandler = useCallback(async () => { localStorage.setItem('instructions', additionalInstructions); closeEnhanceGraphSchemaDialog(); showNormalToast(`Successfully Applied the Instructions`); }, [additionalInstructions]); + const onChangeChunk_size = (newValue: OnChangeValue) => { + if (newValue !== null) { + const parsedValue = Number(newValue.value); + if (isNaN(parsedValue)) { + showNormalToast('Chunk size must be a valid number'); + return; + } + setSelectedTokenChunkSize(parsedValue); + localStorage.setItem('selectedChunk_size', JSON.stringify({ selectedOption: parsedValue })); + } + }; + const onChangeChunk_overlap = (newValue: OnChangeValue) => { + if (newValue !== null) { + const parsedValue = Number(newValue.value); + if (isNaN(parsedValue)) { + showNormalToast('Chunk overlap must be a valid number'); + return; + } + setSelectedChunk_overlap(parsedValue); + localStorage.setItem('selectedChunk_overlap', JSON.stringify({ selectedOption: parsedValue })); + } + }; + const onChangeChunks_to_combine = (newValue: OnChangeValue) => { + if (newValue !== null) { + const parsedValue = Number(newValue.value); + if (isNaN(parsedValue)) { + showNormalToast('Chunks to combine must be a valid number'); + return; + } + setSelectedChunks_to_combine(parsedValue); + localStorage.setItem('selectedChunks_to_combine', JSON.stringify({ selectedOption: parsedValue })); + } + }; return ( - -
- - - - {buttonCaptions.provideAdditionalInstructions} - - - -