-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathspread_analysis.py
More file actions
65 lines (47 loc) · 2.96 KB
/
spread_analysis.py
File metadata and controls
65 lines (47 loc) · 2.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pandas as pd
import numpy as np
import os
# Use the current directory where the Python file is located
csv_directory = './datasets'
# Initialize a list to store the results
results = []
# Loop through each CSV file in the directory
for filename in os.listdir(csv_directory):
if filename.endswith('.csv'):
# Construct the full path to the file
filepath = os.path.join(csv_directory, filename)
# Read the CSV file
df = pd.read_csv(filepath)
# Calculate the spread for each row
df['spread'] = df['ask_price'] - df['bid_price']
# Calculate the mid-price for each row
df['mid_price'] = (df['ask_price'] + df['bid_price']) / 2
# Calculate the spread as a percentage of the mid-price (basis point) and round to 4 decimal places
df['basis_point'] = ((df['spread'] / df['mid_price']) * 10000).round(4)
# Calculate ask volume and bid volume
df['ask_volume'] = df['ask_amount'] * df['ask_price']
df['bid_volume'] = df['bid_amount'] * df['bid_price']
# Calculate the average bid_volume and ask_volume
average_ask_volume = df['ask_volume'].mean()
average_bid_volume = df['bid_volume'].mean()
# Calculate the standard deviation of basis points and round to 4 decimal places
basis_point_std_dev = df['basis_point'].std().round(4)
# Percentiles for basis points, rounded to 4 decimal places
percentiles = [1, 10, 25, 50, 75, 90, 99]
basis_point_percentiles = np.percentile(df['basis_point'], percentiles).round(8)
# Extract the exchange name, date, and symbol from the filename and DataFrame
filename_parts = filename.split('_')
exchange_name = filename_parts[0]
date = filename_parts[2].split('.')[0] # Assuming the date part is immediately before the ".csv"
symbol = df['symbol'].iloc[0] # Assuming the 'symbol' column exists and is uniform across the file
# Append the results, including percentiles, symbol, average volumes, and standard deviation of basis points
results.append((exchange_name, date, symbol, average_bid_volume, average_ask_volume, basis_point_std_dev) + tuple(basis_point_percentiles))
# Define column names, including for percentiles, average volumes, and standard deviation of basis points
columns = ['Exchange', 'Date', 'Symbol', 'Average Bid Volume', 'Average Ask Volume', 'Std Dev of Basis Points'] + [f'{p}th P' for p in percentiles]
# Convert the results into a DataFrame for nicer display
results_df = pd.DataFrame(results, columns=columns)
results_df = results_df.sort_values(by='Exchange')
# Store the DataFrame in a CSV file
output_filepath = os.path.join(csv_directory, 'aggregated_results_with_volumes_and_std_dev.csv')
results_df.to_csv(output_filepath, index=False)
# The modified script now includes rounding for the basis point calculations to ensure maximum 4 decimal places.