ModusMorris · honnigmelone · Feb 26, 2025 · Feb 26, 2025 · Feb 26, 2025 · Feb 26, 2025
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -30,12 +30,12 @@ jobs:
         run: |
           choco install visualstudio2019buildtools --package-parameters "--add Microsoft.VisualStudio.Workload.VCTools"
           choco install cmake
-          pip install -e .  # Install the project in editable mode
+          pip install -r requirements.txt  # Install the project in editable mode
           pip install matplotlib --prefer-binary
 
       - name: Install dependencies on other OSes
         if: matrix.os != 'windows-latest'
-        run: pip install -e .
+        run: pip install -r requirements.txt
 
       - name: Lint with ruff
         run: |

diff --git a/cnn/data_generator.py b/cnn/data_generator.py
@@ -7,31 +7,46 @@
 
 
 def compute_enmo(data):
-    # Calculate the ENMO value for the data
+    """
+    Compute the ENMO value for accelerometer data.
+
+    Parameters:
+        data (DataFrame): Data with 'X', 'Y', and 'Z' columns.
+
+    Returns:
+        ndarray: ENMO values with negatives set to 0.
+    """
     norm = np.sqrt(data["X"] ** 2 + data["Y"] ** 2 + data["Z"] ** 2) - 1
-    return np.maximum(norm, 0)  # Set negative values to 0
+    return np.maximum(norm, 0)
 
 
 class StepCounterDataset(Dataset):
+    """
+    Dataset for step counting using left and right accelerometer data.
+    """
     def __init__(self, left_data, right_data, step_counts, window_size):
-        self.window_size = window_size  # Ensure window_size is assigned
+        """
+        Initialize the dataset by computing ENMO, differences, and step labels.
+
+        Parameters:
+            left_data (DataFrame): Left foot accelerometer data.
+            right_data (DataFrame): Right foot accelerometer data.
+            step_counts (DataFrame): CSV data containing step peaks.
+            window_size (int): Size of the data window.
+        """
+        self.window_size = window_size
 
-        # Calculate ENMO for both feet
         left_data["ENMO"] = compute_enmo(left_data)
         right_data["ENMO"] = compute_enmo(right_data)
 
-        # Calculate the difference in ENMO values
         left_data["ENMO_DIFF"] = left_data["ENMO"].diff().fillna(0)
         right_data["ENMO_DIFF"] = right_data["ENMO"].diff().fillna(0)
 
-        # Stack the ENMO differences for both feet
         self.data = np.hstack((left_data[["ENMO_DIFF"]], right_data[["ENMO_DIFF"]]))
 
-        # Normalize the data
         self.scaler = StandardScaler()
         self.data = self.scaler.fit_transform(self.data)
 
-        # Extract step labels
         def extract_peaks(peaks_str):
             if isinstance(peaks_str, str):
                 try:
@@ -40,76 +55,77 @@ def extract_peaks(peaks_str):
                     return []
             return []
 
-        # Extract peaks for left and right feet
         left_peaks = extract_peaks(step_counts.loc[step_counts["Joint"] == "left_foot_index", "Peaks"].values[0])
         right_peaks = extract_peaks(step_counts.loc[step_counts["Joint"] == "right_foot_index", "Peaks"].values[0])
 
-        # Create step labels
         self.step_labels = np.zeros(len(self.data), dtype=np.float32)
-
-        # Shift step labels to improve peak positions for CNN
         for p in left_peaks + right_peaks:
             if 0 <= p < len(self.step_labels) - (window_size // 2):
                 self.step_labels[p + (window_size // 2)] = 1
 
-        # Debugging information
         print("\n==== Debugging Step Extraction ====")
         print("Step data (first few rows):")
         print(step_counts.head())
-
         print("\nExtraction of peaks for the left foot:")
-        print("Raw data from CSV:", step_counts.loc[step_counts["Joint"] == "left_foot_index", "Peaks"].values)
+        print("Raw data:", step_counts.loc[step_counts["Joint"] == "left_foot_index", "Peaks"].values)
         print("Extracted peaks:", left_peaks)
-
         print("\nExtraction of peaks for the right foot:")
-        print("Raw data from CSV:", step_counts.loc[step_counts["Joint"] == "right_foot_index", "Peaks"].values)
+        print("Raw data:", step_counts.loc[step_counts["Joint"] == "right_foot_index", "Peaks"].values)
         print("Extracted peaks:", right_peaks)
-
         print("\nTotal peaks found: Left =", len(left_peaks), ", Right =", len(right_peaks))
         print("==================================\n")
 
     def __len__(self):
+        """
+        Return the number of data segments.
+        """
         return len(self.data) - self.window_size
 
     def __getitem__(self, idx):
+        """
+        Get a data segment and its step labels with added noise.
+
+        Parameters:
+            idx (int): Starting index of the segment.
+
+        Returns:
+            tuple: (augmented data segment, corresponding labels).
+        """
         x = self.data[idx : idx + self.window_size]
         y = self.step_labels[idx : idx + self.window_size]
-
-        # Data augmentation: Add slight noise to the data
         noise = np.random.normal(0, 0.02, x.shape)
         x_augmented = x + noise
-
         return x_augmented, y
 
 
 def load_datasets(folder_path, window_size, batch_size):
     """
-    Reads the following files:
-    (Folder name)_left_acceleration_data.csv,
-    (Folder name)_right_acceleration_data.csv,
-    scaled_step_counts.csv
-    and creates a DataLoader with segments.
+    Load accelerometer and step count CSV files from a folder and create a DataLoader.
+
+    Parameters:
+        folder_path (str): Path to the folder containing the CSV files.
+        window_size (int): Size of each data window.
+        batch_size (int): Batch size for the DataLoader.
+
+    Returns:
+        DataLoader: DataLoader for the dataset, or None if files are missing/empty.
     """
     folder_name = os.path.basename(folder_path)
     left_file = os.path.join(folder_path, f"{folder_name}_left_acceleration_data.csv")
     right_file = os.path.join(folder_path, f"{folder_name}_right_acceleration_data.csv")
     step_file = os.path.join(folder_path, "scaled_step_counts.csv")
 
-    # Check if all required files exist
     if not (os.path.exists(left_file) and os.path.exists(right_file) and os.path.exists(step_file)):
         print(f"Folder {folder_name}: Missing files, skipping.")
         return None
 
-    # Load data from CSV files
     left_data = pd.read_csv(left_file)
     right_data = pd.read_csv(right_file)
     step_counts = pd.read_csv(step_file)
 
-    # Check if any of the dataframes are empty
     if left_data.empty or right_data.empty or step_counts.empty:
         print(f"Folder {folder_name}: Empty data, skipping.")
         return None
 
-    # Create dataset and DataLoader
     dataset = StepCounterDataset(left_data, right_data, step_counts, window_size)
     return DataLoader(dataset, batch_size=batch_size, shuffle=True)
diff --git a/cnn/prediction.py b/cnn/prediction.py
@@ -9,7 +9,17 @@
 
 
 def load_model(model_path, device, window_size=64):
-    """Loads the trained model."""
+    """
+    Load a pre-trained StepCounterCNN model.
+
+    Parameters:
+        model_path (str): Path to the saved model weights.
+        device (torch.device): Device to load the model onto.
+        window_size (int, optional): Input window size for the model. Defaults to 64.
+
+    Returns:
+        StepCounterCNN: The loaded and evaluated model.
+    """
     model = StepCounterCNN(window_size)
     model.load_state_dict(torch.load(model_path, map_location=device))
     model.to(device)
@@ -18,21 +28,58 @@ def load_model(model_path, device, window_size=64):
 
 
 def compute_enmo(data):
-    """Computes the Euclidean Norm Minus One (ENMO) from accelerometer data."""
+    """
+    Compute the Euclidean Norm Minus One (ENMO) for accelerometer data.
+
+    This function calculates the Euclidean norm of the 'X', 'Y', and 'Z' columns,
+    subtracts 1 from the result, and returns the maximum between the computed value and 0.
+
+    Parameters:
+        data (DataFrame): Accelerometer data with 'X', 'Y', and 'Z' columns.
+
+    Returns:
+        np.ndarray: Array of ENMO values.
+    """
     norm = np.sqrt(data["X"] ** 2 + data["Y"] ** 2 + data["Z"] ** 2) - 1
     return np.maximum(norm, 0)
 
 
 def process_data(left_csv, right_csv):
-    """Loads and processes acceleration data from left and right foot CSV files."""
+    """
+    Load and process accelerometer data from left and right CSV files.
+
+    Reads the CSV files for both left and right foot data, computes the ENMO for each,
+    and returns a DataFrame combining the results.
+
+    Parameters:
+        left_csv (str): Path to the left foot accelerometer CSV.
+        right_csv (str): Path to the right foot accelerometer CSV.
+
+    Returns:
+        DataFrame: A DataFrame with columns 'ENMO_left' and 'ENMO_right'.
+    """
     left_df = pd.read_csv(left_csv)
     right_df = pd.read_csv(right_csv)
 
     return pd.DataFrame({"ENMO_left": compute_enmo(left_df), "ENMO_right": compute_enmo(right_df)})
 
 
 def detect_steps(model, device, data, window_size=64):
-    """Runs the step detection model on the given data."""
+    """
+    Run the step detection model on processed accelerometer data.
+
+    The function scales the data using a StandardScaler, applies the model over sliding windows,
+    aggregates the output probabilities, and identifies step peaks using a threshold.
+
+    Parameters:
+        model (StepCounterCNN): The loaded step detection model.
+        device (torch.device): Device for computation.
+        data (DataFrame): Processed accelerometer data.
+        window_size (int, optional): Size of the sliding window. Defaults to 64.
+
+    Returns:
+        ndarray: Indices of detected step peaks.
+    """
     data = torch.tensor(StandardScaler().fit_transform(data), dtype=torch.float32, device=device)
     frame_probs = np.zeros(len(data), dtype=np.float32)
     overlap_cnt = np.zeros(len(data), dtype=np.float32)
@@ -48,7 +95,18 @@ def detect_steps(model, device, data, window_size=64):
 
 
 def parse_groundtruth_steps(groundtruth_csv):
-    """Parses the ground truth step data from CSV."""
+    """
+    Parse ground truth step data from a CSV file.
+
+    Reads the ground truth CSV, extracts the 'Peaks' column from the first two rows,
+    evaluates the string representations, and returns a set of ground truth step indices.
+
+    Parameters:
+        groundtruth_csv (str): Path to the ground truth CSV file.
+
+    Returns:
+        set: A set containing ground truth step indices.
+    """
     groundtruth_df = pd.read_csv(groundtruth_csv, nrows=2)  # Only consider the first two rows
     steps = set()
     for peak_str in groundtruth_df["Peaks"].dropna():
@@ -60,7 +118,17 @@ def parse_groundtruth_steps(groundtruth_csv):
 
 
 def plot_results(data, detected_steps, groundtruth_steps):
-    """Generates an interactive Plotly visualization of acceleration data, detected steps, and ground truth."""
+    """
+    Create an interactive Plotly visualization of acceleration data and step detections.
+
+    Plots the acceleration signals for each channel, overlays markers for detected steps and
+    ground truth steps, and displays the interactive figure.
+
+    Parameters:
+        data (DataFrame): Combined accelerometer data (e.g., 'ENMO_left' and 'ENMO_right').
+        detected_steps (ndarray): Indices of steps detected by the model.
+        groundtruth_steps (set): Set of ground truth step indices.
+    """
     fig = go.Figure()
     time_axis = np.arange(len(data))
 
@@ -102,7 +170,18 @@ def plot_results(data, detected_steps, groundtruth_steps):
 
 
 def main(model_path, left_csv, right_csv, groundtruth_csv):
-    """Runs the full step detection pipeline and visualization."""
+    """
+    Execute the full step detection pipeline and visualization.
+
+    Loads the trained model, processes accelerometer data from left and right CSV files,
+    runs the step detection, parses ground truth step data, and visualizes the results.
+
+    Parameters:
+        model_path (str): Path to the saved model weights.
+        left_csv (str): Path to the left foot accelerometer CSV.
+        right_csv (str): Path to the right foot accelerometer CSV.
+        groundtruth_csv (str): Path to the ground truth CSV file.
+    """
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model = load_model(model_path, device)
     data = process_data(left_csv, right_csv)
@@ -119,3 +198,4 @@ def main(model_path, left_csv, right_csv, groundtruth_csv):
     groundtruth_csv = "D:/Daisy/5. Semester/SmartHealth/Step-counter/Output/processed_sliced_and_scaled data/test/005/scaled_step_counts.csv"
 
     main(model_path, left_csv, right_csv, groundtruth_csv)
+
diff --git a/requirements.txt b/requirements.txt