Skip to content

Commit f48092a

Browse files
fixes genetic miner
1 parent f2d19a6 commit f48092a

11 files changed

Lines changed: 246 additions & 85 deletions

File tree

examples/genetic_miner.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ def execute_script():
99
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"))
1010
net, im, fm = pm4py.discover_petri_net_genetic(log, population_size = 20, generations = 30)
1111

12+
fitness_tbr = pm4py.fitness_token_based_replay(log, net, im, fm)
13+
print("fitness_tbr", fitness_tbr)
14+
precision_tbr = pm4py.precision_token_based_replay(log, net, im, fm)
15+
print("precision_tbr", precision_tbr)
16+
1217
if importlib.util.find_spec("graphviz"):
1318
pm4py.view_petri_net(net, im, fm, format=examples_conf.TARGET_IMG_FORMAT)
1419

pm4py/algo/discovery/genetic/algorithm.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
'''
2222
from enum import Enum
2323
from pm4py.util import exec_utils
24-
from pm4py.algo.discovery.genetic.variants import classic
2524
from typing import Union, Optional, Dict, Any, Tuple
2625
from pm4py.objects.petri_net.obj import PetriNet, Marking
2726
from pm4py.objects.log.obj import EventLog, EventStream
@@ -43,6 +42,9 @@ class Parameters(Enum):
4342
LOG_CSV = "log_csv"
4443

4544

45+
from pm4py.algo.discovery.genetic.variants import classic
46+
47+
4648
class Variants(Enum):
4749
CLASSIC = classic
4850

pm4py/algo/discovery/genetic/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def __repr__(self):
3737
return "{" + repr(sorted(self))[1:-1] + "}"
3838

3939
@staticmethod
40-
def flat(item: Iterable) -> Self:
40+
def flat(item: Iterable) -> "iset":
4141
return iset(itertools.chain(*item))
4242

4343
def rand_partition(pool: Iterable) -> list[set]:

pm4py/algo/discovery/genetic/variants/classic.py

Lines changed: 130 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import copy
2727
import csv
2828
from datetime import datetime
29-
from func_timeout import func_timeout, FunctionTimedOut
29+
from pm4py.util.timeout import func_timeout, FunctionTimedOut
3030
from tqdm import tqdm
3131
import numpy
3232
import random
@@ -36,21 +36,22 @@
3636
from pm4py.util import exec_utils, constants
3737
from pm4py.util import xes_constants as xes
3838
from pm4py.algo.discovery.genetic.algorithm import Parameters
39+
from pm4py.objects.conversion.log import converter as log_converter
3940
from pm4py.objects.conversion.genetic_matrix.variants.to_petri_net import apply as matrix2petrinet
4041
from pm4py.algo.discovery.genetic.util import get_src_sink_sets_for_wfnet, iset, rand_partition
4142
from pm4py.objects.genetic_matrix.obj import GeneticMatrix
4243

4344
# typing
44-
import typing
45-
from typing import Union, TextIO, Self
45+
from typing import Any, Dict, Optional, TextIO, Tuple, Union
4646
from pandas.core.frame import DataFrame
47-
from pm4py.objects.log.obj import EventLog
47+
from pm4py.objects.log.obj import EventLog, EventStream
4848
from pm4py.objects.petri_net.obj import PetriNet, Marking
4949
from pm4py.algo.discovery.genetic.util import InputMap, OutputMap, Individual
50+
from pm4py.utils import is_polars_lazyframe
5051

5152

5253
def apply(
53-
log: EventLog,
54+
log: Union[EventLog, EventStream, DataFrame],
5455
parameters: Optional[Dict[Union[str, Parameters], Any]] = None,
5556
) -> Tuple[PetriNet, Marking, Marking]:
5657
"""
@@ -89,6 +90,14 @@ def apply(
8990
"""
9091
if parameters is None:
9192
parameters = {}
93+
if is_polars_lazyframe(log):
94+
log = log.collect().to_pandas()
95+
if not isinstance(log, DataFrame):
96+
log = log_converter.apply(
97+
log,
98+
variant=log_converter.Variants.TO_DATA_FRAME,
99+
parameters=parameters,
100+
)
92101

93102
activity_key = exec_utils.get_param_value(
94103
Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY
@@ -128,7 +137,7 @@ def apply(
128137
# configure parameters
129138
if population_size < 2:
130139
raise ValueError("population_size < 2: You need at least two parents for each next generation, thus at least a population size of 2.")
131-
if elitism_min_sample > population_size:
140+
if elitism_min_sample >= population_size:
132141
elitism_min_sample = population_size - 1
133142
if elitism_min_sample < 1:
134143
raise ValueError("elitism_min_sample < 1: No empty samples allowed.")
@@ -143,11 +152,25 @@ def apply(
143152
if tournament_timeout < 1:
144153
tournament_timeout = 1
145154
history = []
155+
best_individual = None
156+
best_fitness = -1.0
146157
population = individuals(log, population_size, T, { # [(I,O), …]
147158
"activity_key":activity_key, "timestamp_key":timestamp_key, "case_id_key":case_id_key
148159
})
149160
for _ in tqdm(range(generations), "Genetic generations"):
150-
population, fitness = tournament(tqdm(population, f"└─Tournament {len(history)}"), log, T, sort=True, timeout=tournament_timeout)
161+
population, fitness = tournament(
162+
tqdm(population, f"└─Tournament {len(history)}"),
163+
log,
164+
T,
165+
sort=True,
166+
timeout=tournament_timeout,
167+
activity_key=activity_key,
168+
timestamp_key=timestamp_key,
169+
case_id_key=case_id_key,
170+
)
171+
if fitness[0] > best_fitness:
172+
best_fitness = fitness[0]
173+
best_individual = copy.deepcopy(population[0])
151174
if log_csv:
152175
log_csv.writerow([datetime.now(), len(history)] + fitness)
153176
if fitness[0] == 1 or (history and all(f == fitness[0] for f in history[-int(generations/2):])):
@@ -167,27 +190,43 @@ def apply(
167190
offspring = mutate(offspring, mutation_rate)
168191
next_population.append(offspring)
169192
population = next_population
170-
return matrix2petrinet(GeneticMatrix(*population[0], T))
193+
if best_individual is None:
194+
population, fitness = tournament(
195+
population,
196+
log,
197+
T,
198+
sort=True,
199+
timeout=tournament_timeout,
200+
activity_key=activity_key,
201+
timestamp_key=timestamp_key,
202+
case_id_key=case_id_key,
203+
)
204+
best_individual = copy.deepcopy(population[0])
205+
return matrix2petrinet(GeneticMatrix(*best_individual, T))
171206

172207
def individuals(log: Union[DataFrame, EventLog], sample_size=1, T=None, keys: dict[str,str] = {"activity_key":xes.DEFAULT_NAME_KEY, "timestamp_key":xes.DEFAULT_TIMESTAMP_KEY, "case_id_key":constants.CASE_CONCEPT_NAME}) -> list[Individual]:
173208
if not T:
174209
T = tuple(log[keys['activity_key']].unique())
210+
T_idx = {activity: idx for idx, activity in enumerate(T)}
175211
# @src 6.1. Initial Population; https://doi.org/10.1007/11494744_5
176212
# create matrix C
177213
C = numpy.zeros((len(T), len(T)))
178214
for _,group in tqdm(log.sort_values(keys['timestamp_key'], ascending=True).groupby(keys['case_id_key']), desc="Find consecutive activities"):
179-
for row1,row2 in itertools.pairwise(map(lambda r: r[1], group.iterrows())):
180-
i = T.index(row1[keys['activity_key']])
181-
o = T.index(row2[keys['activity_key']])
182-
C[i,o] += 1
183-
Cn = C / C.sum(axis=1)[:,None] # normalise row-wise
215+
prev_activity = None
216+
for _, row in group.iterrows():
217+
activity = row[keys['activity_key']]
218+
if prev_activity is not None:
219+
C[T_idx[prev_activity], T_idx[activity]] += 1
220+
prev_activity = activity
221+
row_sums = C.sum(axis=1, keepdims=True)
222+
Cn = numpy.divide(C, row_sums, out=numpy.zeros_like(C, dtype=float), where=row_sums != 0)
184223
samples = []
185-
for sample in tqdm(range(sample_size), "Initial population"):
224+
for _ in tqdm(range(sample_size), "Initial population"):
186225
I,O = defaultdict(list), defaultdict(list)
187226
for i,o in numpy.ndindex(C.shape):
188227
if random.random() < Cn[i,o]: # [0,1[ < [0,1]; 0 < 0 = false
189-
I[T[i]].append(T[o])
190-
O[T[o]].append(T[i])
228+
O[T[i]].append(T[o])
229+
I[T[o]].append(T[i])
191230
I,O = repair(I, O, C, T)
192231
# partitioning already ensures no T in >1 partitions
193232
# s. 4. Causal Matrix, Def. 4; https://doi.org/10.1007/11494744_5
@@ -198,7 +237,17 @@ def individuals(log: Union[DataFrame, EventLog], sample_size=1, T=None, keys: di
198237
samples.append((I,O))
199238
return samples
200239

201-
def tournament(population: list[Individual], log: Union[DataFrame, EventLog], T, sort=True, timeout=1) -> tuple[list[Individual],list[float]]:
240+
def tournament(
241+
population: list[Individual],
242+
log: Union[DataFrame, EventLog],
243+
T,
244+
sort=True,
245+
timeout=1,
246+
*,
247+
activity_key: str = xes.DEFAULT_NAME_KEY,
248+
timestamp_key: str = xes.DEFAULT_TIMESTAMP_KEY,
249+
case_id_key: str = constants.CASE_CONCEPT_NAME,
250+
) -> tuple[list[Individual],list[float]]:
202251
"""sort=True: sort descending by fitness (i.e. best first)"""
203252
# @src 6.2. Fitness Calculation; https://doi.org/10.1007/11494744_5
204253
fitness = []
@@ -208,7 +257,12 @@ def tournament(population: list[Individual], log: Union[DataFrame, EventLog], T,
208257
metrics = func_timeout(
209258
timeout = timeout,
210259
func = pm4py.fitness_token_based_replay,
211-
args = (log, *model)
260+
args = (log, *model),
261+
kwargs = {
262+
"activity_key": activity_key,
263+
"timestamp_key": timestamp_key,
264+
"case_id_key": case_id_key,
265+
},
212266
)
213267
except FunctionTimedOut:
214268
print("\tTimeout for individual", i)
@@ -262,63 +316,72 @@ def crossover(parent1: Individual, parent2: Individual, T: list[str]) -> tuple[I
262316
I2,O2 = offspring2 = copy.deepcopy(parent2)
263317
# 3. swap and recombine
264318
if I1[t] and I2[t]:
319+
old_I1 = iset.flat(I1[t])
320+
old_I2 = iset.flat(I2[t])
265321
swap_point = random.randrange(min(len(I1[t]), len(I2[t])))
266322
toI1, toI2 = I2[t][swap_point:], I1[t][swap_point:]
267323
# no T can exist twice in I/O[t], s. Def. 4; https://doi.org/10.1007/11494744_5
268324
# COPY of I_i, else not properly removed in opposite I_j
269-
I1_flat = iset.flat(I1[t][:swap_point-1])
270-
toI1_dedup = [ iset(S-I1_flat) for S in toI1 ]
271-
I2_flat = iset.flat(I2[t][:swap_point-1])
272-
toI2_dedup = [ iset(S-I2_flat) for S in toI2 ]
325+
I1_flat = iset.flat(I1[t][:swap_point])
326+
toI1_dedup = [iset(S - I1_flat) for S in toI1 if S - I1_flat]
327+
I2_flat = iset.flat(I2[t][:swap_point])
328+
toI2_dedup = [iset(S - I2_flat) for S in toI2 if S - I2_flat]
273329
# merge
274-
I1[t], I2[t] = I1[t][:swap_point-1] + toI1_dedup, I2[t][:swap_point-1] + toI2_dedup
275-
# @src 6.3. Genetic Operations: Update Related Elements; https://doi.org/10.1007/11494744_5
276-
for c in iset.flat(toI1) - iset.flat(toI2): # no reassign staying T
277-
O1[c].append(iset({t}))
278-
for i,p in enumerate(O2[c]): # p = only local var
279-
if t in p:
280-
O2[c][i] = iset(p - {t})
281-
if not O2[c][i]:
282-
O2[c].remove(O2[c][i])
283-
break
284-
for c in iset.flat(toI2) - iset.flat(toI1): # no reassign staying T
285-
O2[c].append(iset({t}))
286-
for i,p in enumerate(O1[c]): # p = only local var
287-
if t in p:
288-
O1[c][i] = iset(p - {t})
289-
if not O1[c][i]:
290-
O1[c].remove(O1[c][i])
291-
break
330+
I1[t], I2[t] = I1[t][:swap_point] + toI1_dedup, I2[t][:swap_point] + toI2_dedup
331+
new_I1 = iset.flat(I1[t])
332+
new_I2 = iset.flat(I2[t])
333+
for c in new_I1 - old_I1:
334+
_add_singleton_partition(O1, c, t)
335+
for c in old_I1 - new_I1:
336+
_remove_value_from_partitions(O1, c, t)
337+
for c in new_I2 - old_I2:
338+
_add_singleton_partition(O2, c, t)
339+
for c in old_I2 - new_I2:
340+
_remove_value_from_partitions(O2, c, t)
292341
if O1[t] and O2[t]:
342+
old_O1 = iset.flat(O1[t])
343+
old_O2 = iset.flat(O2[t])
293344
swap_point = random.randrange(min(len(O1[t]), len(O2[t])))
294345
toO1, toO2 = O2[t][swap_point:], O1[t][swap_point:]
295346
# no T can exist twice in I/O[t], s. Def. 4; https://doi.org/10.1007/11494744_5
296347
# COPY of I_i, else not properly removed in opposite I_j
297-
O1_flat = iset.flat(O1[t][:swap_point-1])
298-
toO1_dedup = [ iset(S-O1_flat) for S in toO1 ]
299-
O2_flat = iset.flat(O2[t][:swap_point-1])
300-
toO2_dedup = [ iset(S-O2_flat) for S in toO2 ]
348+
O1_flat = iset.flat(O1[t][:swap_point])
349+
toO1_dedup = [iset(S - O1_flat) for S in toO1 if S - O1_flat]
350+
O2_flat = iset.flat(O2[t][:swap_point])
351+
toO2_dedup = [iset(S - O2_flat) for S in toO2 if S - O2_flat]
301352
# merge
302-
O1[t], O2[t] = O1[t][:swap_point-1] + toO1_dedup, O2[t][:swap_point-1] + toO2_dedup
303-
# @src 6.3. Genetic Operations: Update Related Elements; https://doi.org/10.1007/11494744_5
304-
for c in iset.flat(toO1) - iset.flat(toO2):
305-
I1[c].append(iset({t}))
306-
for i,p in enumerate(I2[c]): # p = only local var
307-
if t in p:
308-
I2[c][i] = iset(p - {t})
309-
if not I2[c][i]:
310-
I2[c].remove(I2[c][i])
311-
break
312-
for c in iset.flat(toO2) - iset.flat(toO1):
313-
I2[c].append(iset({t}))
314-
for i,p in enumerate(I1[c]): # p = only local var
315-
if t in p:
316-
I1[c][i] = iset(p - {t})
317-
if not I1[c][i]:
318-
I1[c].remove(I1[c][i])
319-
break
353+
O1[t], O2[t] = O1[t][:swap_point] + toO1_dedup, O2[t][:swap_point] + toO2_dedup
354+
new_O1 = iset.flat(O1[t])
355+
new_O2 = iset.flat(O2[t])
356+
for c in new_O1 - old_O1:
357+
_add_singleton_partition(I1, c, t)
358+
for c in old_O1 - new_O1:
359+
_remove_value_from_partitions(I1, c, t)
360+
for c in new_O2 - old_O2:
361+
_add_singleton_partition(I2, c, t)
362+
for c in old_O2 - new_O2:
363+
_remove_value_from_partitions(I2, c, t)
320364
return (offspring1, offspring2)
321365

366+
367+
def _add_singleton_partition(mapping: Dict[str, list[iset]], key: str, value: str) -> None:
368+
partitions = mapping.setdefault(key, [])
369+
if any(value in partition for partition in partitions):
370+
return
371+
partitions.append(iset({value}))
372+
373+
374+
def _remove_value_from_partitions(mapping: Dict[str, list[iset]], key: str, value: str) -> None:
375+
partitions = mapping.setdefault(key, [])
376+
for index, partition in enumerate(partitions):
377+
if value in partition:
378+
updated = iset(partition - {value})
379+
if updated:
380+
partitions[index] = updated
381+
else:
382+
del partitions[index]
383+
return
384+
322385
def mutate(individual: Individual, rate: float = 0.01) -> Individual:
323386
# @src 6.3. Genetic Operations: Mutation; https://doi.org/10.1007/11494744_5
324387
I,O = individual
@@ -348,18 +411,20 @@ def repair(I: list[str], O: list[str], C: numpy.ndarray, T: list[str]) -> tuple[
348411
partition.add(t)
349412
Tn |= (set(I[t]) | set(O[t])) & left
350413
partitions.append(partition)
414+
T_idx = {activity: idx for idx, activity in enumerate(T)}
415+
351416
def rand_connect_one(I: InputMap, O: OutputMap, Ti: list[set], To: list[set], C: numpy.ndarray) -> Individual:
352417
comb = tuple(itertools.product(Ti, To))
353418
try:
354419
ti,to = random.choices(
355420
comb,
356-
weights = [ C[T.index(ti),T.index(to)] for ti,to in comb ],
421+
weights = [ C[T_idx[ti], T_idx[to]] for ti,to in comb ],
357422
k=1
358423
)[0]
359424
except ValueError:
360425
ti,to = random.choice(comb)
361-
I[ti].append(to)
362-
O[to].append(ti)
426+
O[ti].append(to)
427+
I[to].append(ti)
363428
return I,O
364429
while len(partitions) > 1:
365430
random.shuffle(partitions)

pm4py/objects/conversion/genetic_matrix/variants/to_petri_net.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,11 @@ def is_transitively_not_sophisticated(Tio, places):
9999
check.append(Tio2)
100100
# build PetriNet
101101
io = [PetriNet.Place('i'), PetriNet.Place('o')]
102-
net = PetriNet(
103-
places = [PetriNet.Place(p) for p in map(str, X)] + io,
104-
transitions = [PetriNet.Transition(t) for t in T]
105-
)
102+
net = PetriNet()
103+
net.places.update(PetriNet.Place(p) for p in map(str, X))
104+
net.places.update(io)
105+
# Non-silent transitions must have a label; label=None is rendered as invisible.
106+
net.transitions.update(PetriNet.Transition(t, t) for t in T)
106107
# original: F = {(i,t) | t∈T ∧ •t=∅} ∪ {(t,o) | t∈T ∧ t•=∅} ∪ …
107108
T_noI, T_noO = get_src_sink_sets_for_wfnet(I, O, T)
108109
arcs_pt = [ ('i', t) for t in T_noI ] + [
@@ -130,7 +131,7 @@ def is_transitively_not_sophisticated(Tio, places):
130131
Ps_i = { name : PetriNet.Place("i"+str(name)) # input of next non-silent T
131132
for name in [ (t,s) for t in To for s in I[t] ]
132133
}
133-
net.places.extend(itertools.chain(Ps_o.values(), Ps_i.values()))
134+
net.places.update(itertools.chain(Ps_o.values(), Ps_i.values()))
134135
Ts = defaultdict(lambda: PetriNet.Transition(name="")) # name="" → silent
135136
# add arcs
136137
for (t1,So),p in Ps_o.items():
@@ -141,7 +142,7 @@ def is_transitively_not_sophisticated(Tio, places):
141142
petri_utils.add_arc_from_to(p, getTransition(t2), net)
142143
for t1 in Si: # connect silent T
143144
petri_utils.add_arc_from_to(Ts[t1,t2], p, net)
144-
net.transitions.extend(Ts.values())
145+
net.transitions.update(Ts.values())
145146
# add markings
146147
markings = [Marking() for _ in io]
147148
for m,p in zip(markings, io):

0 commit comments

Comments
 (0)