TheAlgorithms · Sananda-Dutta · Mar 29, 2026 · Mar 29, 2026 · Mar 29, 2026
@@ -1,5 +1,7 @@
 {
     "githubPullRequests.ignoredPullRequestBranches": [
         "master"
-    ]
+    ],
+    "python-envs.defaultEnvManager": "ms-python.python:system",
+    "python-envs.defaultPackageManager": "ms-python.python:pip"
 }
diff --git a/digital_image_processing/filters/local_binary_pattern.py b/digital_image_processing/filters/local_binary_pattern.py
@@ -19,7 +19,7 @@ def get_neighbors_pixel(
 
     try:
         return int(image[x_coordinate][y_coordinate] >= center)
-    except (IndexError, TypeError):
+    except IndexError, TypeError:
         return 0
 
 

diff --git a/divide_and_conquer/convex_hull.py b/divide_and_conquer/convex_hull.py
@@ -124,7 +124,7 @@ def _construct_points(
             else:
                 try:
                     points.append(Point(p[0], p[1]))
-                except (IndexError, TypeError):
+                except IndexError, TypeError:
                     print(
                         f"Ignoring deformed point {p}. All points"
                         " must have at least 2 coordinates."

diff --git a/dynamic_programming/catalan_numbers.py b/dynamic_programming/catalan_numbers.py
@@ -71,7 +71,7 @@ def catalan_numbers(upper_limit: int) -> "list[int]":
                 print(f"The Catalan numbers from 0 through {N} are:")
                 print(catalan_numbers(N))
                 print("Try another upper limit for the sequence: ", end="")
-    except (NameError, ValueError):
+    except NameError, ValueError:
         print("\n********* Invalid input, goodbye! ************\n")
 
     import doctest

diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py
@@ -4,17 +4,21 @@
 Output: The decision tree maps a real number input to a real number output.
 """
 
+from collections import Counter
+
 import numpy as np
 
 
 class DecisionTree:
-    def __init__(self, depth=5, min_leaf_size=5):
+    def __init__(self, depth=5, min_leaf_size=5, task="regression", criterion="gini"):
         self.depth = depth
         self.decision_boundary = 0
         self.left = None
         self.right = None
         self.min_leaf_size = min_leaf_size
         self.prediction = None
+        self.task = task
+        self.criterion = criterion
 
     def mean_squared_error(self, labels, prediction):
         """
@@ -38,10 +42,61 @@ def mean_squared_error(self, labels, prediction):
         True
         """
         if labels.ndim != 1:
-            print("Error: Input labels must be one dimensional")
-
+            raise ValueError("Input labels must be one dimensional")
         return np.mean((labels - prediction) ** 2)
 
+    def gini(self, y):
+        """
+        Computes the Gini impurity for a set of labels.
+        Gini impurity measures how often a randomly chosen element
+        would be incorrectly classified.
+        Formula: Gini = 1 - sum(p_i^2)
+        where p_i is the probability of class i.
+
+        Lower Gini value indicates better purity (best split).
+        """
+        _, counts = np.unique(y, return_counts=True)
+        prob = counts / counts.sum()
+        return 1 - np.sum(prob**2)
+
+    def entropy(self, y):
+        """
+        Computes the entropy (impurity) of a set of labels.
+        Entropy measures the randomness or disorder in the data.
+        Formula: Entropy = - sum(p_i * log2(p_i))
+        where p_i is the probability of class i.
+
+        Lower entropy means higher purity.
+        """
+        _, counts = np.unique(y, return_counts=True)
+        prob = counts / counts.sum()
+        return -np.sum(prob * np.log2(prob + 1e-9))
+
+    def information_gain(self, parent, left, right):
+        """
+        Computes the information gain from splitting a dataset.
+        Information gain represents the reduction in impurity
+        after a dataset is split into left and right subsets.
+        Formula: IG = Impurity(parent) - [
+            weighted impurity(left) + weighted impurity(right)]
+
+        Higher information gain indicates a better split.
+        """
+        if self.criterion == "gini":
+            func = self.gini
+        elif self.criterion == "entropy":
+            func = self.entropy
+        else:
+            raise ValueError("Invalid criterion")
+
+        weight_l = len(left) / len(parent)
+        weight_r = len(right) / len(parent)
+
+        return func(parent) - (weight_l * func(left) + weight_r * func(right))
+
+    def most_common_label(self, y):
+        return Counter(y).most_common(1)[0][0]
+
     def train(self, x, y):
         """
         train:
@@ -87,35 +142,47 @@ def train(self, x, y):
         if y.ndim != 1:
             raise ValueError("Data set labels must be one-dimensional")
 
-        if len(x) < 2 * self.min_leaf_size:
-            self.prediction = np.mean(y)
-            return
-
-        if self.depth == 1:
-            self.prediction = np.mean(y)
+        if len(x) < 2 * self.min_leaf_size or self.depth == 1:
+            if self.task == "regression":
+                self.prediction = np.mean(y)
+            else:
+                self.prediction = self.most_common_label(y)
             return
 
         best_split = 0
-        min_error = self.mean_squared_error(x, np.mean(y)) * 2
 
         """
         loop over all possible splits for the decision tree. find the best split.
         if no split exists that is less than 2 * error for the entire array
         then the data set is not split and the average for the entire array is used as
         the predictor
         """
+        best_score = float("inf") if self.task == "regression" else -float("inf")
+
         for i in range(len(x)):
-            if len(x[:i]) < self.min_leaf_size:  # noqa: SIM114
+            if len(x[:i]) < self.min_leaf_size:
                 continue
-            elif len(x[i:]) < self.min_leaf_size:
+            if len(x[i:]) < self.min_leaf_size:
                 continue
+
+            left_y = y[:i]
+            right_y = y[i:]
+
+            if self.task == "regression":
+                error_left = self.mean_squared_error(left_y, np.mean(left_y))
+                error_right = self.mean_squared_error(right_y, np.mean(right_y))
+                score = error_left + error_right
+
+                if score < best_score:
+                    best_score = score
+                    best_split = i
+
             else:
-                error_left = self.mean_squared_error(x[:i], np.mean(y[:i]))
-                error_right = self.mean_squared_error(x[i:], np.mean(y[i:]))
-                error = error_left + error_right
-                if error < min_error:
+                gain = self.information_gain(y, left_y, right_y)
+
+                if gain > best_score:
+                    best_score = gain
                     best_split = i
-                    min_error = error
 
         if best_split != 0:
             left_x = x[:best_split]
@@ -124,18 +191,27 @@ def train(self, x, y):
             right_y = y[best_split:]
 
             self.decision_boundary = x[best_split]
+
             self.left = DecisionTree(
-                depth=self.depth - 1, min_leaf_size=self.min_leaf_size
+                depth=self.depth - 1,
+                min_leaf_size=self.min_leaf_size,
+                task=self.task,
+                criterion=self.criterion,
             )
             self.right = DecisionTree(
-                depth=self.depth - 1, min_leaf_size=self.min_leaf_size
+                depth=self.depth - 1,
+                min_leaf_size=self.min_leaf_size,
+                task=self.task,
+                criterion=self.criterion,
             )
+
             self.left.train(left_x, left_y)
             self.right.train(right_x, right_y)
-        else:
-            self.prediction = np.mean(y)
 
-        return
+        elif self.task == "regression":
+            self.prediction = np.mean(y)
+        else:
+            self.prediction = self.most_common_label(y)
 
     def predict(self, x):
         """
@@ -146,13 +222,13 @@ def predict(self, x):
         """
         if self.prediction is not None:
             return self.prediction
-        elif self.left is not None and self.right is not None:
+        if self.left is not None and self.right is not None:
             if x >= self.decision_boundary:
                 return self.right.predict(x)
             else:
                 return self.left.predict(x)
-        else:
-            raise ValueError("Decision tree not yet trained")
+
+        raise ValueError("Decision tree not yet trained")
 
 
 class TestDecisionTree:
@@ -183,17 +259,20 @@ def main():
     x = np.arange(-1.0, 1.0, 0.005)
     y = np.sin(x)
 
-    tree = DecisionTree(depth=10, min_leaf_size=10)
+    tree = DecisionTree(depth=10, min_leaf_size=10, task="regression")
     tree.train(x, y)
 
-    rng = np.random.default_rng()
-    test_cases = (rng.random(10) * 2) - 1
-    predictions = np.array([tree.predict(x) for x in test_cases])
-    avg_error = np.mean((predictions - test_cases) ** 2)
+    print("Regression prediction:", tree.predict(0.5))
+    x_cls = np.array([1, 2, 3, 4, 5, 6])
+    y_cls = np.array([0, 0, 0, 1, 1, 1])
+
+    clf = DecisionTree(
+        depth=3, min_leaf_size=1, task="classification", criterion="gini"
+    )
+    clf.train(x_cls, y_cls)
 
-    print("Test values: " + str(test_cases))
-    print("Predictions: " + str(predictions))
-    print("Average error: " + str(avg_error))
+    print("Classification prediction (2):", clf.predict(2))
+    print("Classification prediction (5):", clf.predict(5))
 
 
 if __name__ == "__main__":

diff --git a/maths/greatest_common_divisor.py b/maths/greatest_common_divisor.py
@@ -73,7 +73,7 @@ def main():
             f"{greatest_common_divisor(num_1, num_2)}"
         )
         print(f"By iterative gcd({num_1}, {num_2}) = {gcd_by_iterative(num_1, num_2)}")
-    except (IndexError, UnboundLocalError, ValueError):
+    except IndexError, UnboundLocalError, ValueError:
         print("Wrong input")
 
 

diff --git a/project_euler/problem_002/sol4.py b/project_euler/problem_002/sol4.py
@@ -56,7 +56,7 @@ def solution(n: int = 4000000) -> int:
 
     try:
         n = int(n)
-    except (TypeError, ValueError):
+    except TypeError, ValueError:
         raise TypeError("Parameter n must be int or castable to int.")
     if n <= 0:
         raise ValueError("Parameter n must be greater than or equal to one.")

diff --git a/project_euler/problem_003/sol1.py b/project_euler/problem_003/sol1.py
@@ -80,7 +80,7 @@ def solution(n: int = 600851475143) -> int:
 
     try:
         n = int(n)
-    except (TypeError, ValueError):
+    except TypeError, ValueError:
         raise TypeError("Parameter n must be int or castable to int.")
     if n <= 0:
         raise ValueError("Parameter n must be greater than or equal to one.")

diff --git a/project_euler/problem_003/sol2.py b/project_euler/problem_003/sol2.py
@@ -44,7 +44,7 @@ def solution(n: int = 600851475143) -> int:
 
     try:
         n = int(n)
-    except (TypeError, ValueError):
+    except TypeError, ValueError:
         raise TypeError("Parameter n must be int or castable to int.")
     if n <= 0:
         raise ValueError("Parameter n must be greater than or equal to one.")

diff --git a/project_euler/problem_003/sol3.py b/project_euler/problem_003/sol3.py
@@ -44,7 +44,7 @@ def solution(n: int = 600851475143) -> int:
 
     try:
         n = int(n)
-    except (TypeError, ValueError):
+    except TypeError, ValueError:
         raise TypeError("Parameter n must be int or castable to int.")
     if n <= 0:
         raise ValueError("Parameter n must be greater than or equal to one.")

diff --git a/project_euler/problem_005/sol1.py b/project_euler/problem_005/sol1.py
@@ -47,7 +47,7 @@ def solution(n: int = 20) -> int:
 
     try:
         n = int(n)
-    except (TypeError, ValueError):
+    except TypeError, ValueError:
         raise TypeError("Parameter n must be int or castable to int.")
     if n <= 0:
         raise ValueError("Parameter n must be greater than or equal to one.")

diff --git a/project_euler/problem_007/sol2.py b/project_euler/problem_007/sol2.py
@@ -87,7 +87,7 @@ def solution(nth: int = 10001) -> int:
 
     try:
         nth = int(nth)
-    except (TypeError, ValueError):
+    except TypeError, ValueError:
         raise TypeError("Parameter nth must be int or castable to int.") from None
     if nth <= 0:
         raise ValueError("Parameter nth must be greater than or equal to one.")

@@ -67,7 +67,7 @@ def fetch_pharmacy_and_price_list(drug_name: str, zip_code: str) -> list | None:
 
         return pharmacy_price_list
 
-    except (httpx.HTTPError, ValueError):
+    except httpx.HTTPError, ValueError:
         return None
 
 

@@ -53,7 +53,7 @@ def get_json(self) -> dict:
         scripts = BeautifulSoup(html, "html.parser").find_all("script")
         try:
             return extract_user_profile(scripts[4])
-        except (json.decoder.JSONDecodeError, KeyError):
+        except json.decoder.JSONDecodeError, KeyError:
             return extract_user_profile(scripts[3])
 
     def __repr__(self) -> str: