@@ -230,7 +230,7 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type
230230async def extract_graph_from_file_local_file (uri , userName , password , database , model , merged_file_path , fileName , allowedNodes , allowedRelationship , token_chunk_size , chunk_overlap , chunks_to_combine , retry_condition , additional_instructions ):
231231
232232 logging .info (f'Process file name :{ fileName } ' )
233- if not retry_condition :
233+ if retry_condition in [ "" , None ] or retry_condition not in [ DELETE_ENTITIES_AND_START_FROM_BEGINNING , START_FROM_LAST_PROCESSED_POSITION ] :
234234 gcs_file_cache = os .environ .get ('GCS_FILE_CACHE' )
235235 if gcs_file_cache == 'True' :
236236 folder_name = create_gcs_bucket_folder_name_hashed (uri , fileName )
@@ -244,7 +244,7 @@ async def extract_graph_from_file_local_file(uri, userName, password, database,
244244 return await processing_source (uri , userName , password , database , model , fileName , [], allowedNodes , allowedRelationship , token_chunk_size , chunk_overlap , chunks_to_combine , True , merged_file_path , retry_condition , additional_instructions = additional_instructions )
245245
246246async def extract_graph_from_file_s3 (uri , userName , password , database , model , source_url , aws_access_key_id , aws_secret_access_key , file_name , allowedNodes , allowedRelationship , token_chunk_size , chunk_overlap , chunks_to_combine , retry_condition , additional_instructions ):
247- if not retry_condition :
247+ if retry_condition in [ "" , None ] or retry_condition not in [ DELETE_ENTITIES_AND_START_FROM_BEGINNING , START_FROM_LAST_PROCESSED_POSITION ] :
248248 if (aws_access_key_id == None or aws_secret_access_key == None ):
249249 raise LLMGraphBuilderException ('Please provide AWS access and secret keys' )
250250 else :
@@ -258,7 +258,7 @@ async def extract_graph_from_file_s3(uri, userName, password, database, model, s
258258 return await processing_source (uri , userName , password , database , model , file_name , [], allowedNodes , allowedRelationship , token_chunk_size , chunk_overlap , chunks_to_combine , retry_condition = retry_condition , additional_instructions = additional_instructions )
259259
260260async def extract_graph_from_web_page (uri , userName , password , database , model , source_url , file_name , allowedNodes , allowedRelationship , token_chunk_size , chunk_overlap , chunks_to_combine , retry_condition , additional_instructions ):
261- if not retry_condition :
261+ if retry_condition in [ "" , None ] or retry_condition not in [ DELETE_ENTITIES_AND_START_FROM_BEGINNING , START_FROM_LAST_PROCESSED_POSITION ] :
262262 pages = get_documents_from_web_page (source_url )
263263 if pages == None or len (pages )== 0 :
264264 raise LLMGraphBuilderException (f'Content is not available for given URL : { file_name } ' )
@@ -267,7 +267,7 @@ async def extract_graph_from_web_page(uri, userName, password, database, model,
267267 return await processing_source (uri , userName , password , database , model , file_name , [], allowedNodes , allowedRelationship , token_chunk_size , chunk_overlap , chunks_to_combine , retry_condition = retry_condition , additional_instructions = additional_instructions )
268268
269269async def extract_graph_from_file_youtube (uri , userName , password , database , model , source_url , file_name , allowedNodes , allowedRelationship , token_chunk_size , chunk_overlap , chunks_to_combine , retry_condition , additional_instructions ):
270- if not retry_condition :
270+ if retry_condition in [ "" , None ] or retry_condition not in [ DELETE_ENTITIES_AND_START_FROM_BEGINNING , START_FROM_LAST_PROCESSED_POSITION ] :
271271 file_name , pages = get_documents_from_youtube (source_url )
272272
273273 if pages == None or len (pages )== 0 :
@@ -277,7 +277,7 @@ async def extract_graph_from_file_youtube(uri, userName, password, database, mod
277277 return await processing_source (uri , userName , password , database , model , file_name , [], allowedNodes , allowedRelationship , token_chunk_size , chunk_overlap , chunks_to_combine , retry_condition = retry_condition , additional_instructions = additional_instructions )
278278
279279async def extract_graph_from_file_Wikipedia (uri , userName , password , database , model , wiki_query , language , file_name , allowedNodes , allowedRelationship , token_chunk_size , chunk_overlap , chunks_to_combine , retry_condition , additional_instructions ):
280- if not retry_condition :
280+ if retry_condition in [ "" , None ] or retry_condition not in [ DELETE_ENTITIES_AND_START_FROM_BEGINNING , START_FROM_LAST_PROCESSED_POSITION ] :
281281 file_name , pages = get_documents_from_Wikipedia (wiki_query , language )
282282 if pages == None or len (pages )== 0 :
283283 raise LLMGraphBuilderException (f'Wikipedia page is not available for file : { file_name } ' )
@@ -286,7 +286,7 @@ async def extract_graph_from_file_Wikipedia(uri, userName, password, database, m
286286 return await processing_source (uri , userName , password , database , model , file_name ,[], allowedNodes , allowedRelationship , token_chunk_size , chunk_overlap , chunks_to_combine , retry_condition = retry_condition , additional_instructions = additional_instructions )
287287
288288async def extract_graph_from_file_gcs (uri , userName , password , database , model , gcs_project_id , gcs_bucket_name , gcs_bucket_folder , gcs_blob_filename , access_token , file_name , allowedNodes , allowedRelationship , token_chunk_size , chunk_overlap , chunks_to_combine , retry_condition , additional_instructions ):
289- if not retry_condition :
289+ if retry_condition in [ "" , None ] or retry_condition not in [ DELETE_ENTITIES_AND_START_FROM_BEGINNING , START_FROM_LAST_PROCESSED_POSITION ] :
290290 file_name , pages = get_documents_from_gcs (gcs_project_id , gcs_bucket_name , gcs_bucket_folder , gcs_blob_filename , access_token )
291291 if pages == None or len (pages )== 0 :
292292 raise LLMGraphBuilderException (f'File content is not available for file : { file_name } ' )
@@ -431,7 +431,7 @@ async def processing_source(uri, userName, password, database, model, file_name,
431431
432432 # merged_file_path have value only when file uploaded from local
433433
434- if is_uploaded_from_local :
434+ if is_uploaded_from_local and bool ( is_cancelled_status ) == False :
435435 gcs_file_cache = os .environ .get ('GCS_FILE_CACHE' )
436436 if gcs_file_cache == 'True' :
437437 folder_name = create_gcs_bucket_folder_name_hashed (uri , file_name )
@@ -511,7 +511,7 @@ async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password,
511511 return node_count ,rel_count ,latency_processing_chunk
512512
513513def get_chunkId_chunkDoc_list (graph , file_name , pages , token_chunk_size , chunk_overlap , retry_condition ):
514- if not retry_condition :
514+ if retry_condition in [ "" , None ] or retry_condition not in [ DELETE_ENTITIES_AND_START_FROM_BEGINNING , START_FROM_LAST_PROCESSED_POSITION ] :
515515 logging .info ("Break down file into chunks" )
516516 bad_chars = ['"' , "\n " , "'" ]
517517 for i in range (0 ,len (pages )):
@@ -532,7 +532,7 @@ def get_chunkId_chunkDoc_list(graph, file_name, pages, token_chunk_size, chunk_o
532532 chunks = execute_graph_query (graph ,QUERY_TO_GET_CHUNKS , params = {"filename" :file_name })
533533
534534 if chunks [0 ]['text' ] is None or chunks [0 ]['text' ]== "" or not chunks :
535- raise LLMGraphBuilderException (f"Chunks are not created for { file_name } . Please re-upload file and try again ." )
535+ raise LLMGraphBuilderException (f"Chunks are not created for { file_name } . Please re-upload file or reprocess the file with option Start From Beginning ." )
536536 else :
537537 for chunk in chunks :
538538 chunk_doc = Document (page_content = chunk ['text' ], metadata = {'id' :chunk ['id' ], 'position' :chunk ['position' ]})
@@ -714,15 +714,9 @@ def manually_cancelled_job(graph, filenames, source_types, merged_dir, uri):
714714 obj_source_node .updated_at = datetime .now ()
715715 graphDb_data_Access = graphDBdataAccess (graph )
716716 graphDb_data_Access .update_source_node (obj_source_node )
717- count_response = graphDb_data_Access .update_node_relationship_count (file_name )
717+ #Update the nodeCount and relCount properties in Document node
718+ graphDb_data_Access .update_node_relationship_count (file_name )
718719 obj_source_node = None
719- merged_file_path = os .path .join (merged_dir , file_name )
720- if source_type == 'local file' and gcs_file_cache == 'True' :
721- folder_name = create_gcs_bucket_folder_name_hashed (uri , file_name )
722- delete_file_from_gcs (BUCKET_UPLOAD ,folder_name ,file_name )
723- else :
724- logging .info (f'Deleted File Path: { merged_file_path } and Deleted File Name : { file_name } ' )
725- delete_uploaded_local_file (merged_file_path ,file_name )
726720 return "Cancelled the processing job successfully"
727721
728722def populate_graph_schema_from_text (text , model , is_schema_description_checked , is_local_storage ):
@@ -749,10 +743,19 @@ def set_status_retry(graph, file_name, retry_condition):
749743 obj_source_node .is_cancelled = False
750744 if retry_condition == DELETE_ENTITIES_AND_START_FROM_BEGINNING or retry_condition == START_FROM_BEGINNING :
751745 obj_source_node .processed_chunk = 0
752- if retry_condition == DELETE_ENTITIES_AND_START_FROM_BEGINNING :
753- execute_graph_query (graph ,QUERY_TO_DELETE_EXISTING_ENTITIES , params = {"filename" :file_name })
754746 obj_source_node .node_count = 0
755747 obj_source_node .relationship_count = 0
748+ obj_source_node .chunkNodeCount = 0
749+ obj_source_node .chunkRelCount = 0
750+ obj_source_node .communityNodeCount = 0
751+ obj_source_node .communityRelCount = 0
752+ obj_source_node .entityEntityRelCount = 0
753+ obj_source_node .entityNodeCount = 0
754+ obj_source_node .processingTime = 0
755+ obj_source_node .total_chunks = 0
756+ if retry_condition == DELETE_ENTITIES_AND_START_FROM_BEGINNING :
757+ execute_graph_query (graph ,QUERY_TO_DELETE_EXISTING_ENTITIES , params = {"filename" :file_name })
758+
756759 logging .info (obj_source_node )
757760 graphDb_data_Access .update_source_node (obj_source_node )
758761
0 commit comments