ft_linear_regression/train.py at master · XD-OB/ft_linear_regression · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# **************************************************************************** #
#                                                                              #
#                                                         :::      ::::::::    #
#    train.py                                           :+:      :+:    :+:    #
#                                                     +:+ +:+         +:+      #
#    By: obelouch <obelouch@student.42.fr>          +#+  +:+       +#+         #
#                                                 +#+#+#+#+#+   +#+            #
#    Created: 2020/12/13 03:23:06 by obelouch          #+#    #+#              #
#    Updated: 2020/12/14 02:08:00 by obelouch         ###   ########.fr        #
#                                                                              #
# **************************************************************************** #

from src.algos import least_square, bgd, sgd
from src.precision import print_precision
from src.csv_tools import df_fromCSV
from src.params import write_params
import  src.ftMath as myMath
import  pandas as pd
import  numpy as np
import  sys
import  re


# Error Macros:
ERROR_FLAG = 1
ERROR_ARGC = 2

# Algorithm Variables:
algo = 'BGD'
alpha = 0.01


def     set_ALGOandLR(res_algo, res_alpha):
    '''
    Set the Algorithm and Learning Rate depend on the flag
    '''
    global  algo
    global  alpha

    if res_algo:
        algo = res_algo.group()
    if res_alpha:
        alpha = float(res_alpha.group())
    if alpha > 1:
        print('The Gradient Discent Diverge with this learning rate value!')
        print('Setting the default value: 0.01')
        alpha = 0.01


def     exit_usage(error):
    if error == ERROR_ARGC:
        print('Error: Wrong number of arguments!')
    if error == ERROR_FLAG:
        print('Error: Wrong flag!')
    print('\nUsage: python3 train.py [-BGD | -SGD | -LS][{< alpha >}]')
    print('  SGD: Stochastic Gradient Descent')
    print('  BGD: Batch Gradient Descent')
    print('  LS: Least Squares')
    print('  alpha: learning rate < 1')
    exit(1)


def     pick_algo():
    '''
    Check the argument and Pick an algo depend on the flag
    '''
    # Check if nbr of args > 2
    if len(sys.argv) > 2:
        exit_usage(ERROR_ARGC)
    flag = sys.argv[1]
    # Check syntax of the flag
    if not re.match(r'^-(BGD|SGD|LS)(\{[0-9]+(\.[0-9]+)?\})?$', flag):
        exit_usage(ERROR_FLAG)
    # Set the variables: algo, alpha
    set_ALGOandLR(
        # algo part from the flag
        re.search(r'[A-Z]+', flag),
        # alpha part from the flag
        re.search(r'[0-9]+(\.[0-9]+)?', flag),
    )


def     get_theta(norm_X, norm_Y):
    '''
    Get Theta0, Theta1 depend of the choosed Algorithm
    '''
    if algo == 'SGD':
        return sgd(norm_X, norm_Y, alpha)
    if algo == 'LS':
        return least_square(norm_X, norm_Y)
    return bgd(norm_X, norm_Y, alpha)


def     print_loading():
    '''
    Print The Loading Message depend on the Algo type
    '''
    print('\nTraining using ', end='')
    if algo == 'SGD':
        print('Stochastic Gradient Descent Algorithm ....\n')
    elif algo == 'LS':
        print('Least Squares Algorithm ....\n')
    else:
        print('Batch Gradient Descent Algorithm ....\n')


def     train_model():
    '''
    Train the univariable linear regression model with the dataset
    '''
    if len(sys.argv) > 1:
        pick_algo()
    # Read Dataset CSV File
    df = df_fromCSV('Data/data.csv')
    Y = np.array(df['price'])
    X = np.array(df['km'])
    # Normalize X and Y:
    maxKm = myMath.ft_max(X)
    maxPrice = myMath.ft_max(Y)
    devide_x = lambda x: x / maxKm
    devide_y = lambda y: y / maxPrice
    norm_X = np.array([devide_x(x) for x in X])
    norm_Y = np.array([devide_y(y) for y in Y])
    # Apply Algorithm
    print_loading()
    theta = get_theta(norm_X, norm_Y)
    # Adapt the Theta to the Denormalization
    theta[1] = theta[1] * maxPrice / maxKm
    theta[0] = theta[0] * maxPrice
    # Write Thetas in params file
    write_params(theta)
    print('Training DONE ✅\n')
    print_precision(theta, X, Y)


# Launch the program
train_model()