-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdates_loop_dict_pickOGlist.py
More file actions
98 lines (65 loc) · 2.9 KB
/
dates_loop_dict_pickOGlist.py
File metadata and controls
98 lines (65 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 16 13:16:17 2024
@author: reyna
"""
#New idea pick up all the dates , find the most recurrent out of the 30 stocks, sort them datas, create a list.
#Then create a dict out of each csv based on date as main key [close, volume]. Then only pick up the dates
# from your orignal list in a loop and store them in a list of df, then concat them and send to excel
import os
import pandas as pd
source = r'C:\Users\reyna\Desktop\VAR TEST MONTHLY' #VAR TEST DAILY or VAR TEST WEEKLY or VAR TEST MONTHLY
output_file = r'C:\Users\reyna\Desktop\fused documents\FusedmonthlyVar.xlsx' #FuseddailyVar or FusedweeklyVar or FusedmonthlyVar
test_file = r'C:\Users\reyna\Desktop\fused documents\test.xlsx'
"""
#Daily only
# Initialize an empty list to store DataFrames
dfs = []
# Iterate through each file in the directory
for filename in os.listdir(source):
filepath = os.path.join(source, filename)
if os.path.isfile(filepath) and filename.endswith('.csv'):
# Read the CSV file
data = pd.read_csv(filepath)
# Select only the required columns
selected_data = data[['Date']]
# Append selected data to the list of DataFrames
dfs.append(selected_data)
concatenated_data = pd.concat(dfs, axis= 0)
date_counts = concatenated_data['Date'].value_counts()
correct_dates = date_counts[date_counts >= 30] #no need for this part for weekly and monthly
correct_dates = correct_dates.sort_index(ascending = True)
correct_dates = correct_dates.to_dict()
key_data = list(correct_dates.keys())
list_key = pd.DataFrame(key_data)
list_key = list_key.rename(columns = { 0 :"date"})
#list_key.to_excel(test_file, index=True, header=True)
"""
df2 = []
# Iterate through each file in the directory
for filename in os.listdir(source):
filepath = os.path.join(source, filename)
if os.path.isfile(filepath) and filename.endswith('.csv'):
# Read the CSV file
data = pd.read_csv(filepath)
# Select only the required columns
selected_data = data[['Date','Close','Volume']]
selected_data = selected_data.rename(columns={'Date': 'Date'})
selected_data = selected_data.rename(columns={'Close': 'Close' + filename[:-4]})
selected_data = selected_data.rename(columns={'Volume': 'Volume' + filename[:-4]})
# Append selected data to the list of DataFrames
df2.append(selected_data)
#This part is for monthly and weekly only
concate_data = pd.concat(df2, axis= 1)
correct_dates = concate_data.sort_index(ascending = True)
concate_data.to_excel(output_file, index=True, header=True)
'''
#This part of the codecode is for daily only
merged_dfs=[]
for df in df2:
merged_df = list_key.merge(df, how= "inner", left_on ='date', right_on = 'Date')
merged_dfs.append(merged_df)
final_merge = pd.concat(merged_dfs, axis= 1)
final_merge.to_excel(output_file, index=True, header=True)
print(final_merge)
'''