-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfuture_engineering.py
More file actions
72 lines (60 loc) · 1.96 KB
/
future_engineering.py
File metadata and controls
72 lines (60 loc) · 1.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numpy as np
import pandas as pd
# Example mock data (replace with actual data)
customers = np.array([
[1, 28, 'CityA', 'Savings'],
[2, 35, 'CityB', 'Current'],
[3, 42, 'CityC', 'Savings'],
[4, 30, 'CityD', 'Current'],
[5, 50, 'CityE', 'Savings']
])
accounts = np.array([
[1, 1, 15000, 0.5],
[2, 2, 50000, 0.8],
[3, 3, 20000, 0.3],
[4, 4, 70000, 0.9],
[5, 5, 12000, 0.4]
])
transactions = np.array([
[1, 1, 700],
[2, 1, 350],
[3, 2, 1000],
[4, 2, 1000],
[5, 3, 150],
[6, 3, 150],
[7, 4, 2000],
[8, 4, 2000]
])
# --- 1. Risk label ---
risk_score = accounts[:, 3].astype(float)
high_risk = np.where(risk_score > 0.7, 1, 0)
# --- 2. Transaction aggregates ---
customer_ids = customers[:, 0]
total_transaction, avg_transaction, transaction_count = [], [], []
for cid in customer_ids:
cust_tx = transactions[transactions[:,1].astype(int) == int(cid)][:,2].astype(float)
total_transaction.append(np.sum(cust_tx))
avg_transaction.append(np.mean(cust_tx) if len(cust_tx) > 0 else 0)
transaction_count.append(len(cust_tx))
# --- 3. Encode categorical variables ---
cities = list(set(customers[:,2]))
city_encoded = np.array([cities.index(c) for c in customers[:,2]])
account_types = list(set(customers[:,3]))
account_type_encoded = np.array([account_types.index(a) for a in customers[:,3]])
# --- 4. Merge all features into DataFrame ---
balances = accounts[:,2].astype(float)
training_columns = [
'age', 'city_encoded', 'account_type_encoded', 'balance',
'total_transaction', 'avg_transaction', 'transaction_count'
]
final_dataset = pd.DataFrame({
'age': customers[:,1].astype(float),
'city_encoded': city_encoded,
'account_type_encoded': account_type_encoded,
'balance': balances,
'total_transaction': total_transaction,
'avg_transaction': avg_transaction,
'transaction_count': transaction_count,
'target': high_risk
})
print("Final dataset shape:", final_dataset.shape)