From c737a6e367c2823713f2fc2a95c2d55f96fa85f3 Mon Sep 17 00:00:00 2001 From: QuantChallenger <158506283+QuantChallenger@users.noreply.github.com> Date: Wed, 17 Apr 2024 09:44:21 +0200 Subject: [PATCH 1/2] Create Financial Data Fusion and Processing Script --- Financial Data Fusion and Processing Script | 63 +++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 Financial Data Fusion and Processing Script diff --git a/Financial Data Fusion and Processing Script b/Financial Data Fusion and Processing Script new file mode 100644 index 0000000..f41b47f --- /dev/null +++ b/Financial Data Fusion and Processing Script @@ -0,0 +1,63 @@ +#Daily version +import os +import pandas as pd + +source = r'C:\Users\reyna\Desktop\joindaily' + +output_file = r'C:\Users\reyna\Desktop\fused documents\FuseddailyVar.xlsx' +test_file = r'C:\Users\reyna\Desktop\fused documents\test.xlsx' + +# Initialize an empty list to store DataFrames +dfs = [] + +# Iterate through each file in the directory +for filename in os.listdir(source): + filepath = os.path.join(source, filename) + if os.path.isfile(filepath) and filename.endswith('.csv'): + # Read the CSV file + data = pd.read_csv(filepath) + # Select only the required columns + selected_data = data[['Date']] + # Append selected data to the list of DataFrames + dfs.append(selected_data) + +concatenated_data = pd.concat(dfs, axis= 0) + + +date_counts = concatenated_data['Date'].value_counts() + +correct_dates = date_counts[date_counts >= 30] +correct_dates = correct_dates.sort_index(ascending = True) +correct_dates = correct_dates.to_dict() +key_data = list(correct_dates.keys()) +list_key = pd.DataFrame(key_data) +list_key = list_key.rename(columns = { 0 :"date"}) + + + +df2 = [] +# Iterate through each file in the directory +for filename in os.listdir(source): + filepath = os.path.join(source, filename) + if os.path.isfile(filepath) and filename.endswith('.csv'): + # Read the CSV file + data = pd.read_csv(filepath) + # Select only the required columns + selected_data = data[['Date','Close','Volume']] + selected_data = selected_data.rename(columns={'Date': 'Date'}) + selected_data = selected_data.rename(columns={'Close': 'Close' + filename[:-4]}) + selected_data = selected_data.rename(columns={'Volume': 'Volume' + filename[:-4]}) + # Append selected data to the list of DataFrames + df2.append(selected_data) + +#concate_data = pd.concat(df2, axis= 1) +merged_dfs=[] + +for df in df2: + merged_df = list_key.merge(df, how= "inner", left_on ='date', right_on = 'Date') + merged_dfs.append(merged_df) + + +final_merge = pd.concat(merged_dfs, axis= 1) +final_merge.to_excel(output_file, index=True, header=True) +print(final_merge) From dc474009121c8c43fdca1becdc2dade57c91237e Mon Sep 17 00:00:00 2001 From: QuantChallenger <158506283+QuantChallenger@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:24:23 +0200 Subject: [PATCH 2/2] Update Financial Data Fusion and Processing Script --- Financial Data Fusion and Processing Script | 34 ++++++++++++++++++--- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/Financial Data Fusion and Processing Script b/Financial Data Fusion and Processing Script index f41b47f..999ffee 100644 --- a/Financial Data Fusion and Processing Script +++ b/Financial Data Fusion and Processing Script @@ -1,12 +1,18 @@ -#Daily version +#New idea pick up all the dates , find the most recurrent out of the 30 stocks, sort them datas, create a list. +#Then create a dict out of each csv based on date as main key [close, volume]. Then only pick up the dates +# from your OG list in a loop and store them in a list of df, then concat them and send to excel + import os import pandas as pd -source = r'C:\Users\reyna\Desktop\joindaily' +source = r'C:\Users\reyna\Desktop\VAR TEST MONTHLY' #VAR TEST DAILY or VAR TEST WEEKLY or VAR TEST MONTHLY -output_file = r'C:\Users\reyna\Desktop\fused documents\FuseddailyVar.xlsx' +output_file = r'C:\Users\reyna\Desktop\fused documents\FusedmonthlyVar.xlsx' #FuseddailyVar or FusedweeklyVar or FusedmonthlyVar test_file = r'C:\Users\reyna\Desktop\fused documents\test.xlsx' +""" +#Daily only + # Initialize an empty list to store DataFrames dfs = [] @@ -26,7 +32,11 @@ concatenated_data = pd.concat(dfs, axis= 0) date_counts = concatenated_data['Date'].value_counts() -correct_dates = date_counts[date_counts >= 30] + + + + +correct_dates = date_counts[date_counts >= 30] #no need for this part for weekly and monthly correct_dates = correct_dates.sort_index(ascending = True) correct_dates = correct_dates.to_dict() key_data = list(correct_dates.keys()) @@ -35,6 +45,9 @@ list_key = list_key.rename(columns = { 0 :"date"}) +#list_key.to_excel(test_file, index=True, header=True) +""" + df2 = [] # Iterate through each file in the directory for filename in os.listdir(source): @@ -50,7 +63,17 @@ for filename in os.listdir(source): # Append selected data to the list of DataFrames df2.append(selected_data) -#concate_data = pd.concat(df2, axis= 1) +#This part is for monthly and weekly only +concate_data = pd.concat(df2, axis= 1) +correct_dates = concate_data.sort_index(ascending = True) + +concate_data.to_excel(output_file, index=True, header=True) + + +''' +#This part of the codecode is for daily only + + merged_dfs=[] for df in df2: @@ -61,3 +84,4 @@ for df in df2: final_merge = pd.concat(merged_dfs, axis= 1) final_merge.to_excel(output_file, index=True, header=True) print(final_merge) +'''