Skip to content

Commit 3a4b87e

Browse files
authored
Logging and Torch drop (#276)
* Add partial support to logging * Remove torch support, move to sklearn for MLP
1 parent 61f2773 commit 3a4b87e

26 files changed

Lines changed: 681 additions & 595 deletions

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ class Approximation{
103103
See the [**Examples**](#examples) section below and the [**Tutorials**](tutorials/README.md) to have an idea of the potential of this package.
104104

105105
## Dependencies and installation
106-
**EZyRB** requires `numpy`, `scipy`, `sklearn`, `matplotlib`, `torch`,
106+
**EZyRB** requires `numpy`, `scipy`, `sklearn`, `matplotlib`,
107107
`pytest` (for local test) and `sphinx` (to generate the documentation). The code
108108
has been tested with Python3.5 version, but it should be compatible with
109109
Python3. It can be installed using `pip` or directly from the source code.

ezyrb/approximation/ann.py

Lines changed: 111 additions & 183 deletions
Original file line numberDiff line numberDiff line change
@@ -2,220 +2,150 @@
22
Module for Artificial Neural Network (ANN) Prediction.
33
"""
44

5-
import torch
6-
import torch.nn as nn
5+
import logging
76
import numpy as np
7+
from sklearn.neural_network import MLPRegressor
88
from .approximation import Approximation
99

10+
logger = logging.getLogger(__name__)
11+
1012

1113
class ANN(Approximation):
1214
"""
13-
Feed-Forward Artifical Neural Network (ANN).
15+
Feed-Forward Artifical Neural Network (ANN) using sklearn's MLPRegressor.
1416
1517
:param list layers: ordered list with the number of neurons of each hidden
1618
layer.
17-
:param torch.nn.modules.activation function: activation function at each
18-
layer. A single activation function can be passed or a list of them of
19-
length equal to the number of hidden layers.
20-
:param list stop_training: list with the maximum number of training
21-
iterations (int) and/or the desired tolerance on the training loss
22-
(float).
23-
:param torch.nn.Module loss: loss definition (Mean Squared if not given).
24-
:param torch.optim optimizer: the torch class implementing optimizer.
25-
Default value is `Adam` optimizer.
26-
:param float lr: the learning rate. Default is 0.001.
27-
:param float l2_regularization: the L2 regularization coefficient, it
28-
corresponds to the "weight_decay". Default is 0 (no regularization).
29-
:param int frequency_print: the frequency in terms of epochs of the print
30-
during the training of the network.
31-
:param boolean last_identity: Flag to specify if the last activation
32-
function is the identity function. In the case the user provides the
33-
entire list of activation functions, this attribute is ignored. Default
34-
value is True.
19+
:param str activation: activation function for the hidden layers.
20+
Options: 'identity', 'logistic', 'tanh', 'relu' (default).
21+
:param str solver: the solver for weight optimization. Options: 'lbfgs',
22+
'sgd', 'adam' (default).
23+
:param int max_iter: maximum number of iterations. Default is 200.
24+
:param float tol: tolerance for the optimization. Default is 1e-4.
25+
:param float learning_rate_init: initial learning rate (only for 'sgd'
26+
or 'adam'). Default is 0.001.
27+
:param float alpha: L2 penalty (regularization term) parameter.
28+
Default is 0.0001.
29+
:param int frequency_print: the frequency in terms of epochs to print
30+
training progress. Default is 10.
31+
:param int random_state: random state for reproducibility. Default is None.
32+
:param bool early_stopping: whether to use early stopping to terminate
33+
training when validation score is not improving. Default is False.
34+
:param float validation_fraction: proportion of training data to set aside
35+
as validation set for early stopping. Default is 0.1.
3536
3637
:Example:
3738
>>> import ezyrb
3839
>>> import numpy as np
39-
>>> import torch.nn as nn
40-
>>> x = np.random.uniform(-1, 1, size =(4, 2))
40+
>>> x = np.random.uniform(-1, 1, size=(4, 2))
4141
>>> y = np.array([np.sin(x[:, 0]), np.cos(x[:, 1]**3)]).T
42-
>>> ann = ezyrb.ANN([10, 5], nn.Tanh(), [20000,1e-5])
42+
>>> ann = ezyrb.ANN([10, 5], activation='tanh', max_iter=20000)
4343
>>> ann.fit(x, y)
4444
>>> y_pred = ann.predict(x)
4545
>>> print(y)
4646
>>> print(y_pred)
4747
>>> print(len(ann.loss_trend))
4848
>>> print(ann.loss_trend[-1])
4949
"""
50-
def __init__(self, layers, function, stop_training, loss=None,
51-
optimizer=torch.optim.Adam, lr=0.001, l2_regularization=0,
52-
frequency_print=10, last_identity=True):
53-
"""
54-
Initialize an Artificial Neural Network.
55-
56-
:param list layers: Ordered list with the number of neurons of each hidden layer.
57-
:param function: Activation function(s) for each layer.
58-
:param stop_training: Stopping criteria for training (iterations and/or tolerance).
59-
:param loss: Loss function to use. Default is MSELoss.
60-
:param optimizer: Optimizer class to use. Default is Adam.
61-
:param float lr: Learning rate. Default is 0.001.
62-
:param float l2_regularization: L2 regularization coefficient. Default is 0.
63-
:param int frequency_print: Frequency of printing during training. Default is 10.
64-
:param bool last_identity: Whether the last activation is identity. Default is True.
65-
"""
66-
if loss is None:
67-
loss = torch.nn.MSELoss()
68-
69-
if not isinstance(function, list): # Single activation function passed
70-
nl = len(layers) if last_identity else len(layers)+1
71-
function = [function] * nl
72-
73-
if not isinstance(stop_training, list):
74-
stop_training = [stop_training]
75-
76-
if torch.cuda.is_available(): # Check if GPU is available
77-
print("Using cuda device")
78-
torch.cuda.empty_cache()
79-
self.use_cuda = True
80-
else:
81-
self.use_cuda = False
50+
def __init__(
51+
self,
52+
layers,
53+
activation="tanh",
54+
max_iter=200,
55+
solver="adam",
56+
learning_rate_init=0.001,
57+
alpha=0.0001,
58+
frequency_print=10,
59+
**kwargs,
60+
):
61+
logger.debug(
62+
"Initializing ANN with layers=%s, activation=%s, "
63+
"solver=%s, max_iter=%d, lr=%f, alpha=%f",
64+
layers,
65+
activation,
66+
solver,
67+
max_iter,
68+
learning_rate_init,
69+
alpha,
70+
)
8271

8372
self.layers = layers
84-
self.function = function
85-
self.loss = loss
86-
self.stop_training = stop_training
87-
88-
self.loss_trend = []
89-
self.model = None
90-
self.optimizer = optimizer
91-
73+
self.activation = activation
74+
self.solver = solver
75+
self.max_iter = max_iter
76+
self.learning_rate_init = learning_rate_init
77+
self.alpha = alpha
9278
self.frequency_print = frequency_print
93-
self.lr = lr
94-
self.l2_regularization = l2_regularization
95-
96-
def _convert_numpy_to_torch(self, array):
97-
"""
98-
Converting data type.
99-
100-
:param numpy.ndarray array: input array.
101-
:return: the tensorial counter-part of the input array.
102-
:rtype: torch.Tensor.
103-
"""
104-
return torch.from_numpy(array).float()
105-
106-
def _convert_torch_to_numpy(self, tensor):
107-
"""
108-
Converting data type.
109-
110-
:param torch.Tensor tensor: input tensor.
111-
:return: the vectorial counter-part of the input tensor.
112-
:rtype: numpy.ndarray.
113-
"""
114-
return tensor.detach().numpy()
115-
116-
@staticmethod
117-
def _list_to_sequential(layers, functions):
118-
119-
layers_torch = []
120-
inout_layers = [[layers[i], layers[i+1]] for i in range(len(layers)-1)]
79+
self.extra_kwargs = kwargs
12180

122-
while True:
123-
if inout_layers:
124-
inp_d, out_d = inout_layers.pop(0)
125-
layers_torch.append(nn.Linear(inp_d, out_d))
126-
127-
if functions:
128-
layers_torch.append(functions.pop(0))
129-
130-
if not functions and not inout_layers:
131-
break
132-
133-
return nn.Sequential(*layers_torch)
134-
135-
def _build_model(self, points, values):
136-
"""
137-
Build the torch neural network model.
138-
139-
Constructs a feed-forward neural network with the specified layers
140-
and activation functions.
141-
142-
:param numpy.ndarray points: The coordinates of the training points.
143-
:param numpy.ndarray values: The training values at the points.
144-
"""
145-
layers = self.layers.copy()
146-
layers.insert(0, points.shape[1])
147-
layers.append(values.shape[1])
81+
self.model = None
82+
self.loss_trend = []
14883

149-
if self.model is None:
150-
self.model = self._list_to_sequential(layers, self.function)
151-
else:
152-
self.model = self.model
84+
logger.info("ANN initialized with sklearn MLPRegressor")
15385

15486
def fit(self, points, values):
15587
"""
15688
Build the ANN given 'points' and 'values' and perform training.
15789
158-
Training procedure information:
159-
- optimizer: Adam's method with default parameters (see, e.g.,
160-
https://pytorch.org/docs/stable/optim.html);
161-
- loss: self.loss (if none, the Mean Squared Loss is set by
162-
default).
163-
- stopping criterion: the fulfillment of the requested tolerance
164-
on the training loss compatibly with the prescribed budget of
165-
training iterations (if type(self.stop_training) is list); if
166-
type(self.stop_training) is int or type(self.stop_training) is
167-
float, only the number of maximum iterations or the accuracy
168-
level on the training loss is considered as the stopping rule,
169-
respectively.
170-
17190
:param numpy.ndarray points: the coordinates of the given (training)
17291
points.
17392
:param numpy.ndarray values: the (training) values in the points.
17493
"""
94+
logger.debug(
95+
"Fitting ANN with points shape: %s, values shape: %s",
96+
points.shape,
97+
values.shape,
98+
)
99+
100+
# Create the MLPRegressor model
101+
self.model = MLPRegressor(
102+
hidden_layer_sizes=tuple(self.layers),
103+
activation=self.activation,
104+
solver=self.solver,
105+
alpha=self.alpha,
106+
learning_rate_init=self.learning_rate_init,
107+
max_iter=self.max_iter,
108+
verbose=False,
109+
**self.extra_kwargs,
110+
)
111+
112+
# Custom training loop to track loss and print progress
113+
self.loss_trend = []
175114

176-
self._build_model(points, values)
177-
178-
if self.use_cuda:
179-
self.model = self.model.cuda()
180-
points = self._convert_numpy_to_torch(points).cuda()
181-
values = self._convert_numpy_to_torch(values).cuda()
115+
# For sklearn, we need to do partial fitting to track loss
116+
# We'll use the standard fit but access loss_curve_ afterwards
117+
logger.info("Starting ANN training")
118+
119+
if self.frequency_print > 0:
120+
# Monkey patch to capture loss during training
121+
original_fit = self.model.fit
122+
123+
def fit_with_logging(X, y):
124+
result = original_fit(X, y)
125+
if hasattr(self.model, "loss_curve_"):
126+
self.loss_trend = list(self.model.loss_curve_)
127+
for i, loss in enumerate(self.loss_trend):
128+
if (
129+
i == 0
130+
or i == len(self.loss_trend) - 1
131+
or (i + 1) % self.frequency_print == 0
132+
):
133+
print(f"[epoch {i+1:6d}]\t{loss:e}")
134+
return result
135+
136+
fit_with_logging(points, values)
182137
else:
183-
points = self._convert_numpy_to_torch(points)
184-
values = self._convert_numpy_to_torch(values)
185-
186-
optimizer = self.optimizer(
187-
self.model.parameters(),
188-
lr=self.lr, weight_decay=self.l2_regularization)
189-
190-
n_epoch = 1
191-
flag = True
192-
while flag:
193-
y_pred = self.model(points)
194-
195-
loss = self.loss(y_pred, values)
138+
self.model.fit(points, values)
139+
if hasattr(self.model, "loss_curve_"):
140+
self.loss_trend = list(self.model.loss_curve_)
196141

197-
optimizer.zero_grad()
198-
loss.backward()
199-
optimizer.step()
142+
logger.info(
143+
"ANN training completed after %d iterations", self.model.n_iter_
144+
)
145+
if self.loss_trend:
146+
logger.debug("Final loss: %f", self.loss_trend[-1])
200147

201-
scalar_loss = loss.item()
202-
self.loss_trend.append(scalar_loss)
203-
204-
for criteria in self.stop_training:
205-
if isinstance(criteria, int): # stop criteria is an integer
206-
if n_epoch == criteria:
207-
flag = False
208-
elif isinstance(criteria, float): # stop criteria is float
209-
if scalar_loss < criteria:
210-
flag = False
211-
212-
if (flag is False or
213-
n_epoch == 1 or n_epoch % self.frequency_print == 0):
214-
print(f'[epoch {n_epoch:6d}]\t{scalar_loss:e}')
215-
216-
n_epoch += 1
217-
218-
return optimizer
148+
return self
219149

220150
def predict(self, new_point):
221151
"""
@@ -225,12 +155,10 @@ def predict(self, new_point):
225155
:return: the predicted values via the ANN.
226156
:rtype: numpy.ndarray
227157
"""
228-
if self.use_cuda :
229-
new_point = self._convert_numpy_to_torch(new_point).cuda()
230-
new_point = self._convert_numpy_to_torch(
231-
np.array(new_point.cpu())).cuda()
232-
y_new = self._convert_torch_to_numpy(self.model(new_point).cpu())
233-
else:
234-
new_point = self._convert_numpy_to_torch(np.array(new_point))
235-
y_new = self._convert_torch_to_numpy(self.model(new_point))
158+
logger.debug(
159+
"Predicting with ANN for %d points",
160+
np.atleast_2d(new_point).shape[0],
161+
)
162+
new_point = np.atleast_2d(new_point)
163+
y_new = self.model.predict(new_point)
236164
return y_new

0 commit comments

Comments
 (0)