python-programming/decrypt.py at master · OColkesen/python-programming · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""
This program decrypts the Caesar Cipher from encrypted files with any shift of the alphabet.

Authors: Oguzhan Colkesen, Andrew Sherwood
Time Spent: 3.5 hrs
"""
ALPHABET_LENGTH = 26
alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']

def crack_cipher(filename):
    """
    This function cracks the Ceaser Cipher with any shift value from a given file.
    It calls the shift_characters() function to find the correct shifting amount
    and returns the decrypted message.

    Parameters:
        filename - the name of the file, which contains the encrypted text.
                   The target file must be in the same directory as the .py file

    Returns:
        A string, which is the decrypted message with respect to the file. The function
        returns the text shifted by the correct amount to crack the code.
    """

    default_score = 100 * ALPHABET_LENGTH
    cracking_key = 0
    temporary_score = 0

    for n in range(ALPHABET_LENGTH):
        temporary_score = shift_characters(n, filename)
        if min(temporary_score, default_score) == temporary_score:
            default_score = temporary_score
            cracking_key = n
            #stores the value of the shifting number that produces the best score.

    final_text = create_final_string(cracking_key, filename)

    return final_text

def create_final_string(cracking_key, filename):
    """
    This helper function opens the file, shifts every letter by the specified amount,
    and creates a string after this shifting. This string includes the decrypted message.

    Parameters:
        cracking_key - the correct number for shifting to decrypt the message
        filename - the name of the file, which contains the encrypted text.
                   The target file must be in the same directory as the .py file

    Returns:
        A string, which is the decrypted message with respect to the file. The function
        returns the text shifted by the correct amount, which is equivalent to the
        decrypted message.
    """

    with open(filename, "r") as input_file:
        characters = input_file.read()

        characters_list = list(characters)
        alphabet = ['a', 'b', 'c', 'd','e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
                  'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v','w', 'x',
                  'y', 'z']
        final_list = []


        for i in range(len(characters_list)):
            if (characters_list[i] >= 'a' and characters_list[i] <= 'z'):
                for j in range(len(alphabet)):
                    if characters_list[i] == alphabet[j]:
                        #the if statement helps us determine the index of the new letter.
                        new_letter_index = (j - cracking_key) % 26
                        final_list.append(alphabet[new_letter_index])
            else:
                final_list.append(characters_list[i])
                #if the character is not a letter, we do not want to shift it.

        final_string = ""
        final_string = final_string.join(final_list)

        return final_string


def shift_characters(n, filename):
    """
    This helper function opens the file, shifts every letter by the specified amount,
    and creates a list of new letters after this shifting. These letters are used to
    call the generate_score() function to create a score for the shifting, which is returned.

    Parameters:
        n - the number for shifting to decrypt the message
        filename - the name of the file, which contains the encrypted text.
                   The target file must be in the same directory as the .py file

    Returns:
        A float variable, which is the score of the specific shifting generated by
        calling generate_score() function. Lower score implies better shifting.
    """

    with open(filename, "r") as input_file:
        characters = input_file.read()

        letters = []
        shifted_letters = []

        for char in characters:
            if (char >= 'a' and char <= 'z'):
                letters.append(char)

        for i in range(len(letters)):
            for j in range(len(alphabet)):
                if letters[i] == alphabet[j]:
                    new_letter_index = (j - n) % 26
                    shifted_letters.append(alphabet[new_letter_index])

        return generate_score(shifted_letters, alphabet)


def generate_score(letters, alphabet):
    """
    This helper function creates a dictionary of all letters to record how many
    times they appear in the text. Then, these numbers are converted to
    frequency percentages, which is used to calculate how far away is each
    frequency percentage from the frequency percentage in the English language.
    The sum of these differences are returned as the score.

    Parameters:
        letters - a list of letters that will be used to create the frequencies
                  and determine the score
        alphabet - list of letters in the alphabet

    Returns:
        A float variable, which is the sum of the differences of frequencies
        between letters in English and our argument letters list. This is reported
        as the score of the shifting, and lower score means better shifting.
    """

    ENGLISH_FREQ = {'a': 8.17, 'b': 1.49, 'c': 2.78, 'd': 4.25, 'e': 12.70,
                    'f': 2.23, 'g': 2.02, 'h': 6.09, 'i': 6.97, 'j': 0.15,
                    'k': 0.77, 'l': 4.03, 'm': 2.41, 'n': 6.75, 'o': 7.51,
                    'p': 1.93, 'q': 0.09, 'r': 5.99, 's': 6.33, 't': 9.06,
                    'u': 2.76, 'v': 0.98, 'w': 2.36, 'x': 0.15, 'y': 1.97,
                    'z': 0.06}

    letter_count = {'a': 0, 'b': 0, 'c': 0, 'd': 0, 'e': 0,
                    'f': 0, 'g': 0, 'h': 0, 'i': 0, 'j': 0,
                    'k': 0, 'l': 0, 'm': 0, 'n': 0, 'o': 0,
                    'p': 0, 'q': 0, 'r': 0, 's': 0, 't': 0,
                    'u': 0, 'v': 0, 'w': 0, 'x': 0, 'y': 0,
                    'z': 0}

    for letter in letters:
        if letter in alphabet:
            letter_count[letter] += 1
    #stores the number of occurances in letter_count

    for letter in letter_count:
        frequency = 100 * (letter_count[letter]/len(letters))
        letter_count[letter] = frequency
    #converts the number occurances to percentage frequency of occurance in letter_count

    frequency_difference = 0
    score = 0

    for i in ENGLISH_FREQ:
        frequency_difference = abs(letter_count[i] - ENGLISH_FREQ[i])
        score += frequency_difference

    return score