diff --git a/evolve_text.py b/evolve_text.py index cb29dc8..41024a0 100644 --- a/evolve_text.py +++ b/evolve_text.py @@ -91,9 +91,23 @@ def get_text(self): # ----------------------------------------------------------------------------- # Genetic operators # ----------------------------------------------------------------------------- - -# TODO: Implement levenshtein_distance function (see Day 9 in-class exercises) -# HINT: Now would be a great time to implement memoization if you haven't +def levenshtein_distance(s1, s2): + ''' + finds the levensthein_distance between s1 and s2 + ''' + if len(s1) > len(s2): + s1, s2 = s2, s1 + + distances = range(len(s1) + 1) + for i2, c2 in enumerate(s2): + distances_ = [i2+1] + for i1, c1 in enumerate(s1): + if c1 == c2: + distances_.append(distances[i1]) + else: + distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1]))) + distances = distances_ + return distances[-1] def evaluate_text(message, goal_text, verbose=VERBOSE): """ @@ -119,18 +133,34 @@ def mutate_text(message, prob_ins=0.05, prob_del=0.05, prob_sub=0.05): Substitution: Replace one character of the Message with a random (legal) character """ - if random.random() < prob_ins: - # TODO: Implement insertion-type mutation - pass + index = random.randint(0, len(message)-1) + char = random.choice(VALID_CHARS) + message.insert(index, char) + if random.random() < prob_del: + index = random.randint(0, len(message)-1) + message.pop(index) + if random.random() < prob_sub: + index = random.randint(0, len(message)-1) + char = random.choice(VALID_CHARS) + message[index] = char - # TODO: Also implement deletion and substitution mutations - # HINT: Message objects inherit from list, so they also inherit - # useful list methods - # HINT: You probably want to use the VALID_CHARS global variable return (message, ) # Length 1 tuple, required by DEAP +def mate_text(parent1, parent2): + min_len = min(len(parent1), len(parent2)) + parent1 = list(parent1) + parent2 = list(parent2) + i = 0 + while i < min_len: + if random.randint(0,2) == 0: + new_parent_2 = parent1[i] + new_parent_1 = parent2[i] + parent1[i] = new_parent_1 + parent2[i] = new_parent_2 + i+=1 + return(Message("".join(parent1)), Message("".join(parent2))) # ----------------------------------------------------------------------------- # DEAP Toolbox and Algorithm setup @@ -149,7 +179,7 @@ def get_toolbox(text): # Genetic operators toolbox.register("evaluate", evaluate_text, goal_text=text) - toolbox.register("mate", tools.cxTwoPoint) + toolbox.register("mate", mate_text) toolbox.register("mutate", mutate_text) toolbox.register("select", tools.selTournament, tournsize=3) @@ -185,7 +215,7 @@ def evolve_string(text): toolbox, cxpb=0.5, # Prob. of crossover (mating) mutpb=0.2, # Probability of mutation - ngen=500, # Num. of generations to run + ngen=2000, # Num. of generations to run stats=stats) return pop, log @@ -194,6 +224,7 @@ def evolve_string(text): # ----------------------------------------------------------------------------- # Run if called from the command line # ----------------------------------------------------------------------------- + if __name__ == "__main__": # Get goal message from command line (optional) @@ -208,6 +239,7 @@ def evolve_string(text): # Verify that specified goal contains only known valid characters # (otherwise we'll never be able to evolve that string) for char in goal: + if char not in VALID_CHARS: msg = "Given text {goal!r} contains illegal character {char!r}.\n" msg += "Valid set: {val!r}\n" @@ -215,3 +247,4 @@ def evolve_string(text): # Run evolutionary algorithm pop, log = evolve_string(goal) + diff --git a/results.txt b/results.txt new file mode 100644 index 0000000..34d93b4 --- /dev/null +++ b/results.txt @@ -0,0 +1,43 @@ +500 generations,2 initial population +500 2 15 0 15 15 + +500 generations, 10 initial population +500 6 11 0 11 11 + +500 generations, 25 initial population +500 17 7.76 0.58515 7 9 + +500 generations, 60 initial population +500 37 5.03333 0.179505 5 6 + +500 generations, 100 initial population +500 63 6.04 0.195959 6 7 + +500 generations, 500 initial population +500 284 1.022 0.146683 1 2 + +500 generations, 1000 initial population +500 631 0.029 0.173663 0 2 + +The average distance in the final population decreases as the initial population +increases. This is because the minimum and maximum distances decrease. The algorithm +has more options in choosing the more fit individual when selecting the new generation. + +10 generations, 300 initial population +10 207 11.53 1.12061 9 15 + +100 generations, 300 initial population +100 176 6.22667 0.829833 5 12 + +500 generations, 300 initial population +500 186 2.03 0.205994 2 4 + +1000 generations, 300 initial population +1000 179 1.02 0.14 1 2 + +2000 generations, 300 initial population +2000 180 0.02 0.16207 0 2 + +There is the most improvement in accuracy between 100 and 500 generations. After that the +improvement is smaller. The average distance also decreases as the number of generations +gets higher. diff --git a/toolbox_reflection.md b/toolbox_reflection.md new file mode 100644 index 0000000..54038b9 --- /dev/null +++ b/toolbox_reflection.md @@ -0,0 +1,23 @@ +# Feedback on past toolboxes +In general the whole process of the toolboxes was a jumbled mix. It was communicated early on that toolboxes were going to have to be submitted at a certain date, however I think that a lot of people (me included) didn't understand the use of them for future projects until the deadline came. For example, I did the pickling toolbox at the same time as MP4 rather than before because I suddenly had a use for it. However, I didn't do the Evolutionary Algorithms toolbox until after the deadline because my MP5 needed it. +I think that communicating that toolboxes can and are useful for projects later in the class would motivate people more in spreading the toolboxes out over the semester rather than cramming it into the weekend before. Perhaps having a set due date for all five toolboxes would force people to do that. At the same time, we are college students so we should be able to do that on our own. + +### Word Frequency +I found this toolbox to be a good refresher on file handling and list modifications. As I'd done things like this before I didn't really learn anything new. The instructions on what work was expected to be completed and how to scructure the program was very clear. + +### Unittest +This toolbox was very short, and I don't think I learned a lot. I couldn't really figure out why this method of unittesting was better than the one we covered in class. I later found myself wondering how to use unittests in general to test that classes and methods that manipulate objects were working well. +It might be good to have more unittest requirements to complete the toolbox as well as prewritten code that students need to test (as in class objects that need to be tested along with the class methods). + +### Pickling +This was another short one, but I did learn more. Pickling can be quite useful so it was nice to learn about it. It helped my team for MP4. The instructions were fairly clear, and there was a lot of outside suggested documentation provided. + +### Web Apps (Flask) +This was one of my favorite toolboxes. This introduced me to a new way of using python and how to use a non-javascript language to do web development. +A lot of the outside information (Flas, routers, ports, etc..) are explained within the toolbox instructions which is really helpful. The instructions were clear and there was a lot of support provided while still being challenging to implement. + +### AI and Algorithms +Toolboxes are supposed to be relatively short assignments so I understand why a lot of the code was prewritten, but I found myself really wanting to write the code myself. Having an example of how to implement a path finding algorithm was helpful and I still learned quite a bit. The assignment forced us to really read and understand what the code was doing so the assignment was structured nicely. + +### Evolutionary Algorithms +I think that the explanations on how the code is supposed to fit together would have been useful. I had to scratch my head a bit to figure out why I was getting certain bugs and figuring out that I was missing certain things. The concept was really interesting and I definitely learned things, I just wonder if it could have been better structured.