diff --git a/evolve_text.py b/evolve_text.py index cb29dc8..c3df904 100644 --- a/evolve_text.py +++ b/evolve_text.py @@ -18,6 +18,7 @@ from deap import algorithms from deap import base from deap import tools +from levenshtein import levenshtein # ----------------------------------------------------------------------------- @@ -92,9 +93,17 @@ def get_text(self): # Genetic operators # ----------------------------------------------------------------------------- -# TODO: Implement levenshtein_distance function (see Day 9 in-class exercises) +# mplement levenshtein_distance function (see Day 9 in-class exercises) # HINT: Now would be a great time to implement memoization if you haven't +def levenshtein_distance(message, goal_text): + """ + Given a Message and a goal_text string, return the levenshtein distance + between the Message and the goal_text + """ + return levenshtein(message, goal_text) + + def evaluate_text(message, goal_text, verbose=VERBOSE): """ Given a Message and a goal_text string, return the Levenshtein distance @@ -107,6 +116,20 @@ def evaluate_text(message, goal_text, verbose=VERBOSE): return (distance, ) # Length 1 tuple, required by DEAP +def twoPointCX(message1, message2): + ''' + implements a two-point crossover of a string s1 and s2 + ''' + point1 = random.randint(0, min(len(message1), len(message2))) + point2 = random.randint(0, min(len(message1), len(message2))) + + if point1 > point2: + message1[point2:point1], message2[point2:point1] = message2[point2:point1], message1[point2:point1] + else: + message1[point1:point2], message2[point1:point2] = message2[point1:point2], message1[point1:point2] + + return message1, message2 + def mutate_text(message, prob_ins=0.05, prob_del=0.05, prob_sub=0.05): """ Given a Message and independent probabilities for each mutation type, @@ -121,13 +144,12 @@ def mutate_text(message, prob_ins=0.05, prob_del=0.05, prob_sub=0.05): """ if random.random() < prob_ins: - # TODO: Implement insertion-type mutation - pass + loc = random.randint(0, len(message.get_text())-1) + message.insert(loc, random.choice(VALID_CHARS)) - # TODO: Also implement deletion and substitution mutations - # HINT: Message objects inherit from list, so they also inherit - # useful list methods - # HINT: You probably want to use the VALID_CHARS global variable + if random.random() < prob_del: + loc = random.randint(0, len(message.get_text())-1) + message[loc] = random.choice(VALID_CHARS) return (message, ) # Length 1 tuple, required by DEAP @@ -149,11 +171,11 @@ def get_toolbox(text): # Genetic operators toolbox.register("evaluate", evaluate_text, goal_text=text) - toolbox.register("mate", tools.cxTwoPoint) + toolbox.register("mate", twoPointCX) toolbox.register("mutate", mutate_text) toolbox.register("select", tools.selTournament, tournsize=3) - # NOTE: You can also pass function arguments as you define aliases, e.g. + # You can also pass function arguments as you define aliases, e.g. # toolbox.register("individual", Message, max_length=200) # toolbox.register("mutate", mutate_text, prob_sub=0.18) @@ -183,9 +205,9 @@ def evolve_string(text): # (See: http://deap.gel.ulaval.ca/doc/dev/api/algo.html for details) pop, log = algorithms.eaSimple(pop, toolbox, - cxpb=0.5, # Prob. of crossover (mating) + cxpb=0.9, # Prob. of crossover (mating) mutpb=0.2, # Probability of mutation - ngen=500, # Num. of generations to run + ngen=2000, # Num. of generations to run stats=stats) return pop, log diff --git a/levenshtein.py b/levenshtein.py new file mode 100644 index 0000000..bb1c375 --- /dev/null +++ b/levenshtein.py @@ -0,0 +1,28 @@ +cache = {} + + +def levenshtein(s1, s2): + """ + Returns levenshtein distance between message and goal + """ + + if (s1, s2) in cache: + return cache[s1, s2] + if s1 == '': + return(len(s2)) + if s2 == '': + return(len(s1)) + + if s1[-1] == s2[-1]: + cost = 0 + else: + cost = 1 + res = min(levenshtein(s1[:-1], s2)+1, + levenshtein(s1, s2[:-1])+1, + levenshtein(s1[:-1], s2[:-1])+cost) + cache[s1, s2] = res + return res + + +if __name__ == '__main__': + print(levenshtein('cola', 'coca')) diff --git a/results.txt b/results.txt new file mode 100644 index 0000000..ddadffe --- /dev/null +++ b/results.txt @@ -0,0 +1,4 @@ +I think the concept is super interesting, and am really curious how +this could be paired the Biomimicry. My results showed that it took +about several thousand generations to reach the final text. +Will describe more in-depth, need to sleep now.