Coursera_Introduction_to_Scripting_in_Python/Processing_Cancer_Risk_Data.py at master · xiaomei7/Coursera_Introduction_to_Scripting_in_Python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
Week 4 practice project solution for Python Data Analysis
Processing 2D tables
"""

import csv


#########################################################
# Part 1 - Week 3


def print_table(table):
    """
    Echo a nested list to the console
    """
    for row in table:
        print(row)


def read_csv_file(file_name):
    """
    Given a CSV file, read the data into a nested list
    Input: String corresponding to comma-separated  CSV file
    Output: Lists of lists consisting of the fields in the CSV file
    """

    with open(file_name, newline='') as csv_file:       # don't need to explicitly close the file now
        csv_table = []
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
            csv_table.append(row)
    return csv_table


def write_csv_file(csv_table, file_name):
    """
    Input: Nested list csv_table and a string file_name
    Action: Write fields in csv_table into a comma-separated CSV file with the name file_name
    """

    with open(file_name, 'w', newline='') as csv_file:
        csv_writer = csv.writer(csv_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        for row in csv_table:
            csv_writer.writerow(row)


#########################################################
# Part 2 - Week 4


def select_columns(my_table, col_indices):
    """
    Input: Nested list my_table and a list of integers col_indices
    Output: Nested list corresponding to sub-table formed by
    columns in col_indices
    """
    answer = []
    for row in my_table:
        reduced_row = [row[idx] for idx in col_indices]
        answer.append(reduced_row)
    return answer


def sort_by_column(my_table, col_idx):
    """
    Input: Nested list my_table and an integer col_idx
    Action: Mutate the order of the rows in my_table such that the entries in
    the column col_idx appear in DESCENDING order when interpreted as numbers
    """
    # https://stackoverflow.com/questions/8966538/syntax-behind-sortedkey-lambda/8966627#8966627
    col_fun = lambda row: float(row[col_idx])
    my_table.sort(key = col_fun, reverse = True)


def test_part2_code():
    """
    Run examples that test the functions for part 2
    """

    # Load a simple example table
    test_table = read_csv_file("test_case.csv")  # file is available at ...
    print_table(test_table)
    print()

    # Simple test for column trimmng function
    print_table(select_columns(test_table, [0, 2]))
    print()

    # Simple test for column sorting function
    sort_by_column(test_table, 3)
    print_table(test_table)
    print()

    # Read cancer-risk data set, select columns A, B, C, E, and L, then sort by column E in descending order
    cancer_risk_table = read_csv_file("cancer_risk05_v4_county.csv")
    col_indices = [0, 1, 2, 4, 11]
    trimmed_risk_table = select_columns(cancer_risk_table, col_indices)
    sort_by_column(trimmed_risk_table, 4)
    write_csv_file(trimmed_risk_table, "cancer_risk_trimmed.csv")

    # Load our file "cancer_risk_trimmed_solution.csv" and compare with your solution
    trimmed_risk_solution = read_csv_file("cancer_risk_trimmed_solution.csv")
    for row in range(len(trimmed_risk_table)):
        for col in range(len(trimmed_risk_table[0])):
            if trimmed_risk_table[row][col] != trimmed_risk_solution[row][col]:
                print("Difference at", row, col, trimmed_risk_table[row][col], trimmed_risk_solution[row][col])


test_part2_code()

#Output from test_part2_code()
##['1', '2', '3', '4']
##['5', '6', '7', '8']
##['-2', '-3', '-4', '-5']
##
##['1', '3']
##['5', '7']
##['-2', '-4']
##
##['5', '6', '7', '8']
##['1', '2', '3', '4']
##['-2', '-3', '-4', '-5']