22Module for Artificial Neural Network (ANN) Prediction.
33"""
44
5- import torch
6- import torch .nn as nn
5+ import logging
76import numpy as np
7+ from sklearn .neural_network import MLPRegressor
88from .approximation import Approximation
99
10+ logger = logging .getLogger (__name__ )
11+
1012
1113class ANN (Approximation ):
1214 """
13- Feed-Forward Artifical Neural Network (ANN).
15+ Feed-Forward Artifical Neural Network (ANN) using sklearn's MLPRegressor .
1416
1517 :param list layers: ordered list with the number of neurons of each hidden
1618 layer.
17- :param torch.nn.modules.activation function: activation function at each
18- layer. A single activation function can be passed or a list of them of
19- length equal to the number of hidden layers.
20- :param list stop_training: list with the maximum number of training
21- iterations (int) and/or the desired tolerance on the training loss
22- (float).
23- :param torch.nn.Module loss: loss definition (Mean Squared if not given).
24- :param torch.optim optimizer: the torch class implementing optimizer.
25- Default value is `Adam` optimizer.
26- :param float lr: the learning rate. Default is 0.001.
27- :param float l2_regularization: the L2 regularization coefficient, it
28- corresponds to the "weight_decay". Default is 0 (no regularization).
29- :param int frequency_print: the frequency in terms of epochs of the print
30- during the training of the network.
31- :param boolean last_identity: Flag to specify if the last activation
32- function is the identity function. In the case the user provides the
33- entire list of activation functions, this attribute is ignored. Default
34- value is True.
19+ :param str activation: activation function for the hidden layers.
20+ Options: 'identity', 'logistic', 'tanh', 'relu' (default).
21+ :param str solver: the solver for weight optimization. Options: 'lbfgs',
22+ 'sgd', 'adam' (default).
23+ :param int max_iter: maximum number of iterations. Default is 200.
24+ :param float tol: tolerance for the optimization. Default is 1e-4.
25+ :param float learning_rate_init: initial learning rate (only for 'sgd'
26+ or 'adam'). Default is 0.001.
27+ :param float alpha: L2 penalty (regularization term) parameter.
28+ Default is 0.0001.
29+ :param int frequency_print: the frequency in terms of epochs to print
30+ training progress. Default is 10.
31+ :param int random_state: random state for reproducibility. Default is None.
32+ :param bool early_stopping: whether to use early stopping to terminate
33+ training when validation score is not improving. Default is False.
34+ :param float validation_fraction: proportion of training data to set aside
35+ as validation set for early stopping. Default is 0.1.
3536
3637 :Example:
3738 >>> import ezyrb
3839 >>> import numpy as np
39- >>> import torch.nn as nn
40- >>> x = np.random.uniform(-1, 1, size =(4, 2))
40+ >>> x = np.random.uniform(-1, 1, size=(4, 2))
4141 >>> y = np.array([np.sin(x[:, 0]), np.cos(x[:, 1]**3)]).T
42- >>> ann = ezyrb.ANN([10, 5], nn.Tanh(), [ 20000,1e-5] )
42+ >>> ann = ezyrb.ANN([10, 5], activation='tanh', max_iter= 20000)
4343 >>> ann.fit(x, y)
4444 >>> y_pred = ann.predict(x)
4545 >>> print(y)
4646 >>> print(y_pred)
4747 >>> print(len(ann.loss_trend))
4848 >>> print(ann.loss_trend[-1])
4949 """
50- def __init__ (self , layers , function , stop_training , loss = None ,
51- optimizer = torch .optim .Adam , lr = 0.001 , l2_regularization = 0 ,
52- frequency_print = 10 , last_identity = True ):
53- """
54- Initialize an Artificial Neural Network.
55-
56- :param list layers: Ordered list with the number of neurons of each hidden layer.
57- :param function: Activation function(s) for each layer.
58- :param stop_training: Stopping criteria for training (iterations and/or tolerance).
59- :param loss: Loss function to use. Default is MSELoss.
60- :param optimizer: Optimizer class to use. Default is Adam.
61- :param float lr: Learning rate. Default is 0.001.
62- :param float l2_regularization: L2 regularization coefficient. Default is 0.
63- :param int frequency_print: Frequency of printing during training. Default is 10.
64- :param bool last_identity: Whether the last activation is identity. Default is True.
65- """
66- if loss is None :
67- loss = torch .nn .MSELoss ()
68-
69- if not isinstance (function , list ): # Single activation function passed
70- nl = len (layers ) if last_identity else len (layers )+ 1
71- function = [function ] * nl
72-
73- if not isinstance (stop_training , list ):
74- stop_training = [stop_training ]
75-
76- if torch .cuda .is_available (): # Check if GPU is available
77- print ("Using cuda device" )
78- torch .cuda .empty_cache ()
79- self .use_cuda = True
80- else :
81- self .use_cuda = False
50+ def __init__ (
51+ self ,
52+ layers ,
53+ activation = "tanh" ,
54+ max_iter = 200 ,
55+ solver = "adam" ,
56+ learning_rate_init = 0.001 ,
57+ alpha = 0.0001 ,
58+ frequency_print = 10 ,
59+ ** kwargs ,
60+ ):
61+ logger .debug (
62+ "Initializing ANN with layers=%s, activation=%s, "
63+ "solver=%s, max_iter=%d, lr=%f, alpha=%f" ,
64+ layers ,
65+ activation ,
66+ solver ,
67+ max_iter ,
68+ learning_rate_init ,
69+ alpha ,
70+ )
8271
8372 self .layers = layers
84- self .function = function
85- self .loss = loss
86- self .stop_training = stop_training
87-
88- self .loss_trend = []
89- self .model = None
90- self .optimizer = optimizer
91-
73+ self .activation = activation
74+ self .solver = solver
75+ self .max_iter = max_iter
76+ self .learning_rate_init = learning_rate_init
77+ self .alpha = alpha
9278 self .frequency_print = frequency_print
93- self .lr = lr
94- self .l2_regularization = l2_regularization
95-
96- def _convert_numpy_to_torch (self , array ):
97- """
98- Converting data type.
99-
100- :param numpy.ndarray array: input array.
101- :return: the tensorial counter-part of the input array.
102- :rtype: torch.Tensor.
103- """
104- return torch .from_numpy (array ).float ()
105-
106- def _convert_torch_to_numpy (self , tensor ):
107- """
108- Converting data type.
109-
110- :param torch.Tensor tensor: input tensor.
111- :return: the vectorial counter-part of the input tensor.
112- :rtype: numpy.ndarray.
113- """
114- return tensor .detach ().numpy ()
115-
116- @staticmethod
117- def _list_to_sequential (layers , functions ):
118-
119- layers_torch = []
120- inout_layers = [[layers [i ], layers [i + 1 ]] for i in range (len (layers )- 1 )]
79+ self .extra_kwargs = kwargs
12180
122- while True :
123- if inout_layers :
124- inp_d , out_d = inout_layers .pop (0 )
125- layers_torch .append (nn .Linear (inp_d , out_d ))
126-
127- if functions :
128- layers_torch .append (functions .pop (0 ))
129-
130- if not functions and not inout_layers :
131- break
132-
133- return nn .Sequential (* layers_torch )
134-
135- def _build_model (self , points , values ):
136- """
137- Build the torch neural network model.
138-
139- Constructs a feed-forward neural network with the specified layers
140- and activation functions.
141-
142- :param numpy.ndarray points: The coordinates of the training points.
143- :param numpy.ndarray values: The training values at the points.
144- """
145- layers = self .layers .copy ()
146- layers .insert (0 , points .shape [1 ])
147- layers .append (values .shape [1 ])
81+ self .model = None
82+ self .loss_trend = []
14883
149- if self .model is None :
150- self .model = self ._list_to_sequential (layers , self .function )
151- else :
152- self .model = self .model
84+ logger .info ("ANN initialized with sklearn MLPRegressor" )
15385
15486 def fit (self , points , values ):
15587 """
15688 Build the ANN given 'points' and 'values' and perform training.
15789
158- Training procedure information:
159- - optimizer: Adam's method with default parameters (see, e.g.,
160- https://pytorch.org/docs/stable/optim.html);
161- - loss: self.loss (if none, the Mean Squared Loss is set by
162- default).
163- - stopping criterion: the fulfillment of the requested tolerance
164- on the training loss compatibly with the prescribed budget of
165- training iterations (if type(self.stop_training) is list); if
166- type(self.stop_training) is int or type(self.stop_training) is
167- float, only the number of maximum iterations or the accuracy
168- level on the training loss is considered as the stopping rule,
169- respectively.
170-
17190 :param numpy.ndarray points: the coordinates of the given (training)
17291 points.
17392 :param numpy.ndarray values: the (training) values in the points.
17493 """
94+ logger .debug (
95+ "Fitting ANN with points shape: %s, values shape: %s" ,
96+ points .shape ,
97+ values .shape ,
98+ )
99+
100+ # Create the MLPRegressor model
101+ self .model = MLPRegressor (
102+ hidden_layer_sizes = tuple (self .layers ),
103+ activation = self .activation ,
104+ solver = self .solver ,
105+ alpha = self .alpha ,
106+ learning_rate_init = self .learning_rate_init ,
107+ max_iter = self .max_iter ,
108+ verbose = False ,
109+ ** self .extra_kwargs ,
110+ )
111+
112+ # Custom training loop to track loss and print progress
113+ self .loss_trend = []
175114
176- self ._build_model (points , values )
177-
178- if self .use_cuda :
179- self .model = self .model .cuda ()
180- points = self ._convert_numpy_to_torch (points ).cuda ()
181- values = self ._convert_numpy_to_torch (values ).cuda ()
115+ # For sklearn, we need to do partial fitting to track loss
116+ # We'll use the standard fit but access loss_curve_ afterwards
117+ logger .info ("Starting ANN training" )
118+
119+ if self .frequency_print > 0 :
120+ # Monkey patch to capture loss during training
121+ original_fit = self .model .fit
122+
123+ def fit_with_logging (X , y ):
124+ result = original_fit (X , y )
125+ if hasattr (self .model , "loss_curve_" ):
126+ self .loss_trend = list (self .model .loss_curve_ )
127+ for i , loss in enumerate (self .loss_trend ):
128+ if (
129+ i == 0
130+ or i == len (self .loss_trend ) - 1
131+ or (i + 1 ) % self .frequency_print == 0
132+ ):
133+ print (f"[epoch { i + 1 :6d} ]\t { loss :e} " )
134+ return result
135+
136+ fit_with_logging (points , values )
182137 else :
183- points = self ._convert_numpy_to_torch (points )
184- values = self ._convert_numpy_to_torch (values )
185-
186- optimizer = self .optimizer (
187- self .model .parameters (),
188- lr = self .lr , weight_decay = self .l2_regularization )
189-
190- n_epoch = 1
191- flag = True
192- while flag :
193- y_pred = self .model (points )
194-
195- loss = self .loss (y_pred , values )
138+ self .model .fit (points , values )
139+ if hasattr (self .model , "loss_curve_" ):
140+ self .loss_trend = list (self .model .loss_curve_ )
196141
197- optimizer .zero_grad ()
198- loss .backward ()
199- optimizer .step ()
142+ logger .info (
143+ "ANN training completed after %d iterations" , self .model .n_iter_
144+ )
145+ if self .loss_trend :
146+ logger .debug ("Final loss: %f" , self .loss_trend [- 1 ])
200147
201- scalar_loss = loss .item ()
202- self .loss_trend .append (scalar_loss )
203-
204- for criteria in self .stop_training :
205- if isinstance (criteria , int ): # stop criteria is an integer
206- if n_epoch == criteria :
207- flag = False
208- elif isinstance (criteria , float ): # stop criteria is float
209- if scalar_loss < criteria :
210- flag = False
211-
212- if (flag is False or
213- n_epoch == 1 or n_epoch % self .frequency_print == 0 ):
214- print (f'[epoch { n_epoch :6d} ]\t { scalar_loss :e} ' )
215-
216- n_epoch += 1
217-
218- return optimizer
148+ return self
219149
220150 def predict (self , new_point ):
221151 """
@@ -225,12 +155,10 @@ def predict(self, new_point):
225155 :return: the predicted values via the ANN.
226156 :rtype: numpy.ndarray
227157 """
228- if self .use_cuda :
229- new_point = self ._convert_numpy_to_torch (new_point ).cuda ()
230- new_point = self ._convert_numpy_to_torch (
231- np .array (new_point .cpu ())).cuda ()
232- y_new = self ._convert_torch_to_numpy (self .model (new_point ).cpu ())
233- else :
234- new_point = self ._convert_numpy_to_torch (np .array (new_point ))
235- y_new = self ._convert_torch_to_numpy (self .model (new_point ))
158+ logger .debug (
159+ "Predicting with ANN for %d points" ,
160+ np .atleast_2d (new_point ).shape [0 ],
161+ )
162+ new_point = np .atleast_2d (new_point )
163+ y_new = self .model .predict (new_point )
236164 return y_new
0 commit comments