Skip to content

Commit 5838eda

Browse files
committed
Fix PCA variable naming and complete all pre-commit hooks
- Changed all x, x_standardized, x_transformed variables to lowercase - Fixed N811 import naming issue - Fixed all remaining variable naming violations - All 4 ML algorithm files now pass ruff checks - Naive bayes mypy issues resolved - All pre-commit hooks should now pass
1 parent d7e08a6 commit 5838eda

File tree

1 file changed

+45
-45
lines changed

1 file changed

+45
-45
lines changed

machine_learning/pca_from_scratch.py

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@ def __init__(self, n_components: int | None = None) -> None:
4646
self.mean_: np.ndarray | None = None
4747
self.std_: np.ndarray | None = None
4848

49-
def _standardize_data(self, X: np.ndarray) -> np.ndarray:
49+
def _standardize_data(self, x: np.ndarray) -> np.ndarray:
5050
"""
5151
Standardize the data by mean centering and scaling to unit variance.
5252
5353
Args:
54-
X: Input data matrix of shape (n_samples, n_features)
54+
x: Input data matrix of shape (n_samples, n_features)
5555
5656
Returns:
5757
Standardized data matrix
@@ -65,23 +65,23 @@ def _standardize_data(self, X: np.ndarray) -> np.ndarray:
6565
True
6666
"""
6767
# Calculate mean and standard deviation
68-
self.mean_ = np.mean(X, axis=0)
69-
self.std_ = np.std(X, axis=0, ddof=0) # ddof=0 for population std
68+
self.mean_ = np.mean(x, axis=0)
69+
self.std_ = np.std(x, axis=0, ddof=0) # ddof=0 for population std
7070

7171
# Avoid division by zero for constant features
7272
self.std_[self.std_ == 0] = 1.0
7373

7474
# Standardize the data
75-
X_standardized = (X - self.mean_) / self.std_
75+
x_standardized = (x - self.mean_) / self.std_
7676

77-
return X_standardized
77+
return x_standardized
7878

79-
def _compute_covariance_matrix(self, X: np.ndarray) -> np.ndarray:
79+
def _compute_covariance_matrix(self, x: np.ndarray) -> np.ndarray:
8080
"""
8181
Compute the covariance matrix of the standardized data.
8282
8383
Args:
84-
X: Standardized data matrix of shape (n_samples, n_features)
84+
x: Standardized data matrix of shape (n_samples, n_features)
8585
8686
Returns:
8787
Covariance matrix of shape (n_features, n_features)
@@ -95,9 +95,9 @@ def _compute_covariance_matrix(self, X: np.ndarray) -> np.ndarray:
9595
>>> np.allclose(cov_matrix, cov_matrix.T) # Symmetric matrix
9696
True
9797
"""
98-
n_samples = X.shape[0]
98+
n_samples = x.shape[0]
9999
# Covariance matrix = (X^T * X) / (n_samples - 1)
100-
covariance_matrix = np.dot(X.T, X) / (n_samples - 1)
100+
covariance_matrix = np.dot(x.T, x) / (n_samples - 1)
101101
return covariance_matrix
102102

103103
def _eigenvalue_decomposition(
@@ -130,12 +130,12 @@ def _eigenvalue_decomposition(
130130

131131
return eigenvalues, eigenvectors
132132

133-
def fit(self, X: np.ndarray) -> "PCAFromScratch":
133+
def fit(self, x: np.ndarray) -> "PCAFromScratch":
134134
"""
135135
Fit PCA to the data.
136136
137137
Args:
138-
X: Input data matrix of shape (n_samples, n_features)
138+
x: Input data matrix of shape (n_samples, n_features)
139139
140140
Returns:
141141
Self for method chaining
@@ -146,10 +146,10 @@ def fit(self, X: np.ndarray) -> "PCAFromScratch":
146146
>>> isinstance(fitted, PCAFromScratch)
147147
True
148148
"""
149-
if X.ndim != 2:
149+
if x.ndim != 2:
150150
raise ValueError("Input data must be 2-dimensional")
151151

152-
n_samples, n_features = X.shape
152+
n_samples, n_features = x.shape
153153

154154
# Set default number of components
155155
if self.n_components is None:
@@ -164,10 +164,10 @@ def fit(self, X: np.ndarray) -> "PCAFromScratch":
164164
)
165165

166166
# Standardize the data
167-
X_standardized = self._standardize_data(X)
167+
x_standardized = self._standardize_data(x)
168168

169169
# Compute covariance matrix
170-
covariance_matrix = self._compute_covariance_matrix(X_standardized)
170+
covariance_matrix = self._compute_covariance_matrix(x_standardized)
171171

172172
# Perform eigenvalue decomposition
173173
eigenvalues, eigenvectors = self._eigenvalue_decomposition(covariance_matrix)
@@ -184,12 +184,12 @@ def fit(self, X: np.ndarray) -> "PCAFromScratch":
184184

185185
return self
186186

187-
def transform(self, X: np.ndarray) -> np.ndarray:
187+
def transform(self, x: np.ndarray) -> np.ndarray:
188188
"""
189189
Transform data using the fitted PCA.
190190
191191
Args:
192-
X: Input data matrix of shape (n_samples, n_features)
192+
x: Input data matrix of shape (n_samples, n_features)
193193
194194
Returns:
195195
Transformed data matrix of shape (n_samples, n_components)
@@ -205,19 +205,19 @@ def transform(self, X: np.ndarray) -> np.ndarray:
205205
raise ValueError("PCA must be fitted before transform")
206206

207207
# Standardize the input data using the same parameters as during fit
208-
X_standardized = (X - self.mean_) / self.std_
208+
x_standardized = (x - self.mean_) / self.std_
209209

210210
# Project data onto principal components
211-
X_transformed = np.dot(X_standardized, self.components_)
211+
x_transformed = np.dot(x_standardized, self.components_)
212212

213-
return X_transformed
213+
return x_transformed
214214

215-
def fit_transform(self, X: np.ndarray) -> np.ndarray:
215+
def fit_transform(self, x: np.ndarray) -> np.ndarray:
216216
"""
217217
Fit PCA and transform data in one step.
218218
219219
Args:
220-
X: Input data matrix of shape (n_samples, n_features)
220+
x: Input data matrix of shape (n_samples, n_features)
221221
222222
Returns:
223223
Transformed data matrix of shape (n_samples, n_components)
@@ -228,14 +228,14 @@ def fit_transform(self, X: np.ndarray) -> np.ndarray:
228228
>>> X_transformed.shape
229229
(50, 2)
230230
"""
231-
return self.fit(X).transform(X)
231+
return self.fit(x).transform(x)
232232

233-
def inverse_transform(self, X_transformed: np.ndarray) -> np.ndarray:
233+
def inverse_transform(self, x_transformed: np.ndarray) -> np.ndarray:
234234
"""
235235
Transform data back to original space.
236236
237237
Args:
238-
X_transformed: Transformed data matrix of shape (n_samples, n_components)
238+
x_transformed: Transformed data matrix of shape (n_samples, n_components)
239239
240240
Returns:
241241
Data in original space of shape (n_samples, n_features)
@@ -251,12 +251,12 @@ def inverse_transform(self, X_transformed: np.ndarray) -> np.ndarray:
251251
raise ValueError("PCA must be fitted before inverse_transform")
252252

253253
# Transform back to standardized space
254-
X_standardized = np.dot(X_transformed, self.components_.T)
254+
x_standardized = np.dot(x_transformed, self.components_.T)
255255

256256
# Denormalize to original space
257-
X_original = (X_standardized * self.std_) + self.mean_
257+
x_original = (x_standardized * self.std_) + self.mean_
258258

259-
return X_original
259+
return x_original
260260

261261

262262
def compare_with_sklearn() -> None:
@@ -267,31 +267,31 @@ def compare_with_sklearn() -> None:
267267
very close to the scikit-learn implementation.
268268
"""
269269
from sklearn.datasets import make_blobs
270-
from sklearn.decomposition import PCA as sklearn_pca
270+
from sklearn.decomposition import PCA
271271

272272
# Generate sample data
273-
X, _ = make_blobs(n_samples=100, centers=3, n_features=4, random_state=42)
273+
x, _ = make_blobs(n_samples=100, centers=3, n_features=4, random_state=42)
274274

275275
# Our implementation
276276
pca_ours = PCAFromScratch(n_components=2)
277-
X_transformed_ours = pca_ours.fit_transform(X)
277+
x_transformed_ours = pca_ours.fit_transform(x)
278278

279279
# Scikit-learn implementation
280-
pca_sklearn = sklearn_pca(n_components=2, random_state=42)
281-
X_transformed_sklearn = pca_sklearn.fit_transform(X)
280+
pca_sklearn = PCA(n_components=2, random_state=42)
281+
x_transformed_sklearn = pca_sklearn.fit_transform(x)
282282

283283
# Compare results (should be very similar, possibly with different signs)
284284
print("Our PCA - First 5 rows:")
285-
print(X_transformed_ours[:5])
285+
print(x_transformed_ours[:5])
286286
print("\nScikit-learn PCA - First 5 rows:")
287-
print(X_transformed_sklearn[:5])
287+
print(x_transformed_sklearn[:5])
288288

289289
print(f"\nOur explained variance ratio: {pca_ours.explained_variance_ratio_}")
290290
print(f"Sklearn explained variance ratio: {pca_sklearn.explained_variance_ratio_}")
291291

292292
# Check if results are similar (within tolerance)
293293
correlation = np.corrcoef(
294-
X_transformed_ours.flatten(), X_transformed_sklearn.flatten()
294+
x_transformed_ours.flatten(), x_transformed_sklearn.flatten()
295295
)[0, 1]
296296
print(f"\nCorrelation between implementations: {correlation:.6f}")
297297

@@ -303,26 +303,26 @@ def main() -> None:
303303
# Generate sample data
304304
rng = np.random.default_rng(42)
305305
n_samples, n_features = 100, 4
306-
X = rng.standard_normal((n_samples, n_features))
306+
x = rng.standard_normal((n_samples, n_features))
307307

308-
print("Original data shape:", X.shape)
308+
print("Original data shape:", x.shape)
309309
print("Original data (first 5 rows):")
310-
print(X[:5])
310+
print(x[:5])
311311

312312
# Apply PCA
313313
pca = PCAFromScratch(n_components=2)
314-
X_transformed = pca.fit_transform(X)
314+
x_transformed = pca.fit_transform(x)
315315

316-
print(f"\nTransformed data shape: {X_transformed.shape}")
316+
print(f"\nTransformed data shape: {x_transformed.shape}")
317317
print("Transformed data (first 5 rows):")
318-
print(X_transformed[:5])
318+
print(x_transformed[:5])
319319

320320
print(f"\nExplained variance ratio: {pca.explained_variance_ratio_}")
321321
print(f"Total variance explained: {np.sum(pca.explained_variance_ratio_):.4f}")
322322

323323
# Demonstrate inverse transform
324-
X_reconstructed = pca.inverse_transform(X_transformed)
325-
reconstruction_error = np.mean((X - X_reconstructed) ** 2)
324+
x_reconstructed = pca.inverse_transform(x_transformed)
325+
reconstruction_error = np.mean((x - x_reconstructed) ** 2)
326326
print(f"\nReconstruction error (MSE): {reconstruction_error:.6f}")
327327

328328
# Compare with sklearn

0 commit comments

Comments
 (0)