forked from v1xerunt/DocTr
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenetic.py
More file actions
102 lines (81 loc) · 3.98 KB
/
genetic.py
File metadata and controls
102 lines (81 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import numpy as np
import random
import math
from utils import max_sim
random.seed(0)
def calculate_entropy(ratios):
entropy = 0.0
for ratio in ratios:
if ratio > 0:
entropy -= ratio * math.log2(ratio)
return entropy
def calculate_fairness_score(locations):
# N * F
num_locations = len(locations)
location_entropies = []
for location in locations:
entropy = calculate_entropy(location)
location_entropies.append(entropy)
avg_entropy = sum(location_entropies) / num_locations
max_entropy = math.log2(len(locations[0]))
fairness_score = avg_entropy / max_entropy
return fairness_score
def calculate_geo_entropy(selection):
geo_set = list(set(selection))
total_groups = len(geo_set)
counts = np.zeros(total_groups)
for group in selection:
cur_idx = geo_set.index(group)
counts[cur_idx] += 1
probabilities = counts / np.sum(counts)
entropy = -np.sum(p * np.log(p) for p in probabilities if p > 0)
max_entropy = math.log2(total_groups)
entropy = entropy / max_entropy
return entropy
def fitness(solution):
total_score = sum(item[0] for item in solution)
gender_entropy = calculate_fairness_score([item[1] for item in solution])
race_entropy = calculate_fairness_score([item[2] for item in solution])
ethnicity_entropy = calculate_fairness_score([item[3] for item in solution])
geo_entropy = calculate_geo_entropy([item[4] for item in solution])
competiting_score = sum([item[5] for item in solution])
cur_score = total_score + 10*(gender_entropy + race_entropy + ethnicity_entropy + geo_entropy) - 0.1 * competiting_score
return cur_score if cur_score > 0 else 0 # You can adjust the weights if needed
def create_initial_population(items, population_size, K):
return [random.sample(items, K) for _ in range(population_size)]
def select_parents(population):
fitness_scores = [fitness(sol) for sol in population]
total_fitness = sum(fitness_scores)
selection_probs = [f / total_fitness for f in fitness_scores]
selected_indices = np.random.choice(range(len(population)), size=2, replace=False, p=selection_probs)
return [population[i] for i in selected_indices]
def crossover(parent1, parent2, K):
crossover_point = random.randint(1, K - 1)
child1 = parent1[:crossover_point] + parent2[crossover_point:]
child2 = parent2[:crossover_point] + parent1[crossover_point:]
return child1, child2
def mutate(solution, items, mutation_rate):
if random.random() < mutation_rate:
mutate_index = random.randint(0, len(solution) - 1)
solution[mutate_index] = random.choice(items)
return solution
def genetic_algorithm(items, K, population_size=50, generations=100, mutation_rate=0.01):
population = create_initial_population(items, population_size, K)
for _ in range(generations):
new_population = []
for _ in range(population_size // 2):
parent1, parent2 = select_parents(population)
child1, child2 = crossover(parent1, parent2, K)
new_population.extend([mutate(child1, items, mutation_rate), mutate(child2, items, mutation_rate)])
population = new_population
best_solution = max(population, key=fitness)
return best_solution
def calc_metrics(best_solution, scores):
solution_idx = [item[-1] for item in best_solution]
total_score = np.mean([scores[idx] for idx in solution_idx])
avg_gender_entropy = calculate_fairness_score([item[1] for item in best_solution])
avg_race_entropy = calculate_fairness_score([item[2] for item in best_solution])
avg_ethnicity_entropy = calculate_fairness_score([item[3] for item in best_solution])
avg_geo_entropy = calculate_geo_entropy([item[4] for item in best_solution])
avg_competiting_score = np.mean([item[5] for item in best_solution])
return total_score, avg_gender_entropy, avg_race_entropy, avg_ethnicity_entropy, avg_geo_entropy, avg_competiting_score