-
Notifications
You must be signed in to change notification settings - Fork 27
Expand file tree
/
Copy pathcomplex_algorithm.py
More file actions
115 lines (93 loc) · 3.69 KB
/
complex_algorithm.py
File metadata and controls
115 lines (93 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# An inefficient implementation of a data processing algorithm
# This code contains performance bottlenecks and inefficient data structures
# that can be optimized with GitHub Copilot's assistance
def process_large_dataset(data, threshold=0.5):
"""
Process a large dataset with an inefficient algorithm.
This function has several performance issues:
- Redundant calculations
- Inefficient data structures
- Unnecessary copying of data
- Poor algorithm choice
Parameters:
- data: List of dictionaries with 'id', 'value', and 'metadata' fields
- threshold: Threshold value for filtering
Returns:
- Processed and filtered data
"""
# Initialize results
results = []
# Pre-process: extract values for later use
all_values = []
for item in data:
all_values.append(item['value'])
# Calculate statistics - inefficiently
total = 0
for val in all_values:
total += val
mean = total / len(all_values) if len(all_values) > 0 else 0
# Calculate variance - inefficiently
variance = 0
for val in all_values:
variance += (val - mean) ** 2
variance = variance / len(all_values) if len(all_values) > 0 else 0
# Process each item - with redundant calculations
for item in data:
# Normalize value - redundantly calculated for each item
normalized_value = (item['value'] - mean) / (variance ** 0.5) if variance > 0 else 0
# Filter based on threshold
if normalized_value > threshold:
# Deep copy the item to avoid modifying original data
processed_item = {}
for key in item:
processed_item[key] = item[key]
# Add derived fields
processed_item['normalized_value'] = normalized_value
processed_item['is_significant'] = normalized_value > 2 * threshold
# Add to results
results.append(processed_item)
# Sort results - inefficiently
for i in range(len(results)):
for j in range(i + 1, len(results)):
if results[i]['normalized_value'] < results[j]['normalized_value']:
results[i], results[j] = results[j], results[i]
# Calculate additional metrics for filtered items - redundant loops
metadata_counts = {}
for item in results:
meta = item['metadata']
if meta in metadata_counts:
metadata_counts[meta] = metadata_counts[meta] + 1
else:
metadata_counts[meta] = 1
# Add frequency information to results - another loop through results
for item in results:
item['frequency'] = metadata_counts[item['metadata']] / len(results) if len(results) > 0 else 0
return results
def generate_test_data(size=1000):
"""
Generate test data for demonstration.
"""
import random
data = []
for i in range(size):
data.append({
'id': i,
'value': random.random() * 100,
'metadata': random.choice(['A', 'B', 'C', 'D', 'E'])
})
return data
# Example usage
if __name__ == "__main__":
# Generate sample data
test_data = generate_test_data(size=5000)
# Time the processing
import time
start_time = time.time()
# Process data
result = process_large_dataset(test_data)
# Print execution time
print(f"Processed {len(test_data)} items in {time.time() - start_time:.4f} seconds")
print(f"Result contains {len(result)} items")
# Print first few results
for i, item in enumerate(result[:5]):
print(f"{i+1}. ID: {item['id']}, Value: {item['value']:.2f}, Normalized: {item['normalized_value']:.2f}")