DsysDML
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎rbms/bernoulli_bernoulli/implement.py‎
Lines changed: 4 additions & 4 deletions b/‎rbms/bernoulli_bernoulli/implement.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎rbms/bernoulli_gaussian/classes.py‎
Lines changed: 9 additions & 6 deletions b/‎rbms/bernoulli_gaussian/classes.py‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎rbms/bernoulli_gaussian/functional.py‎
Lines changed: 1 addition & 5 deletions b/‎rbms/bernoulli_gaussian/functional.py‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎rbms/bernoulli_gaussian/implement.py‎
Lines changed: 3 additions & 10 deletions b/‎rbms/bernoulli_gaussian/implement.py‎
Lines changed: 3 additions & 10 deletions
diff --git a/‎rbms/classes.py‎
Lines changed: 1 addition & 1 deletion b/‎rbms/classes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rbms/io.py‎
Lines changed: 2 additions & 2 deletions b/‎rbms/io.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎rbms/ising_gaussian/classes.py‎
Lines changed: 1 addition & 3 deletions b/‎rbms/ising_gaussian/classes.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎rbms/ising_gaussian/functional.py‎
Lines changed: 0 additions & 4 deletions b/‎rbms/ising_gaussian/functional.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎rbms/ising_gaussian/implement.py‎
Lines changed: 7 additions & 11 deletions b/‎rbms/ising_gaussian/implement.py‎
Lines changed: 7 additions & 11 deletions
@@ -24,7 +24,7 @@ dependencies = [
   "h5py>=3.12.0",
   "numpy>=2.0.0",
   "matplotlib>=3.8.0",
-  "torch>=2.6.0", 
+  "torch>=2.10.0", 
   "tqdm>=4.65.0",
 ]
 
@@ -85,4 +85,4 @@ docstring-code-format = false
 [dependency-groups]
 dev = [
     "pytest>=8.4.1",
-]
+]
@@ -58,7 +58,7 @@ def _compute_energy_hiddens(
     return -field - log_term.sum(1)
 
 
-@torch.jit.script
+# @torch.jit.script
 def _compute_gradient(
     v_data: Tensor,
     mh_data: Tensor,
@@ -108,9 +108,9 @@ def _compute_gradient(
 
     # Attach to the parameters
 
-    weight_matrix.grad.set_(grad_weight_matrix)
-    vbias.grad.set_(grad_vbias)
-    hbias.grad.set_(grad_hbias)
+    weight_matrix.grad = grad_weight_matrix
+    vbias.grad = grad_vbias
+    hbias.grad = grad_hbias
 
 
 @torch.jit.script
 
@@ -1,4 +1,5 @@
 from __future__ import annotations
+from botocore.vendored.six import u
 
 import numpy as np
 import torch
@@ -40,7 +41,8 @@ def __init__(
         self.weight_matrix = weight_matrix.to(device=self.device, dtype=self.dtype)
         self.vbias = vbias.to(device=self.device, dtype=self.dtype)
         self.hbias = hbias.to(device=self.device, dtype=self.dtype)
-        log_two_pi = torch.log(2.0 * torch.pi, dtype=vbias.dtype, device=vbias.device)
+        log_two_pi = torch.log(torch.tensor(2.0 * torch.pi, dtype=dtype, device=device))
+
         self.const = (
             0.5
             * float(weight_matrix.shape[1])
@@ -191,20 +193,21 @@ def named_parameters(self):
         }
 
     @property
-    def num_hiddens(self):
+    def num_hiddens(self) -> int:
         return self.hbias.shape[0]
 
     @property
-    def num_visibles(self):
+    def num_visibles(self) -> int:
         return self.vbias.shape[0]
 
     def parameters(self) -> list[Tensor]:
         # keep trainables only
         return [self.weight_matrix, self.vbias, self.hbias]
 
-    def ref_log_z(self):
-        K = self.num_hiddens()
-        Nv = self.num_visibles()
+    @property
+    def ref_log_z(self) -> float:
+        K = self.num_hiddens
+        Nv = self.num_visibles
         logZ_v = torch.log1p(torch.exp(self.vbias)).sum()
         inv_gamma = 1.0 / float(Nv)
         quad = 0.5 * inv_gamma * torch.dot(self.hbias, self.hbias)
 
@@ -79,12 +79,10 @@ def compute_gradient(
     chains: dict[str, Tensor],
     params: BGRBM,
     centered: bool,
-    lambda_l1: float = 0.0,
-    lambda_l2: float = 0.0,
 ) -> None:
     _compute_gradient(
         v_data=data["visible"],
-        mh_data=data["hidden_mag"],  # use conditional mean for positive phase
+        h_data=data["hidden_mag"],  # use conditional mean for positive phase
         w_data=data["weights"],
         v_chain=chains["visible"],
         h_chain=chains["hidden_mag"],  # negative phase from chain samples
@@ -93,8 +91,6 @@ def compute_gradient(
         hbias=params.hbias,
         weight_matrix=params.weight_matrix,
         centered=centered,
-        lambda_l1=lambda_l1,
-        lambda_l2=lambda_l2,
     )
 
 
 
@@ -2,7 +2,6 @@
 from torch import Tensor
 
 
-@torch.jit.script
 def _sample_hiddens(
     v: Tensor, weight_matrix: Tensor, hbias: Tensor, beta: float = 1.0
 ) -> tuple[Tensor, Tensor]:
@@ -11,7 +10,6 @@ def _sample_hiddens(
     return h, mh
 
 
-@torch.jit.script
 def _sample_visibles(
     h: Tensor, weight_matrix: Tensor, vbias: Tensor, beta: float = 1.0
 ) -> tuple[Tensor, Tensor]:
@@ -20,7 +18,6 @@ def _sample_visibles(
     return v, mv
 
 
-@torch.jit.script
 def _compute_energy(
     v: Tensor,
     h: Tensor,
@@ -39,7 +36,6 @@ def _compute_energy(
     return -fields - interaction + quad
 
 
-@torch.jit.script
 def _compute_energy_visibles(
     v: Tensor,
     vbias: Tensor,
@@ -54,7 +50,6 @@ def _compute_energy_visibles(
     return -field - quad_term + const
 
 
-@torch.jit.script
 def _compute_energy_hiddens(
     h: Tensor, vbias: Tensor, hbias: Tensor, weight_matrix: Tensor
 ) -> Tensor:
@@ -66,7 +61,6 @@ def _compute_energy_hiddens(
     return -field - log_term.sum(1) + quad
 
 
-@torch.jit.script
 def _compute_gradient(
     v_data: Tensor,
     h_data: Tensor,
@@ -113,12 +107,11 @@ def _compute_gradient(
         grad_hbias = h_data_mean - h_gen_mean
 
     # Attach to the parameters
-    weight_matrix.grad.set_(grad_weight_matrix)
-    vbias.grad.set_(grad_vbias)
-    hbias.grad.set_(grad_hbias)
+    weight_matrix.grad = grad_weight_matrix
+    vbias.grad = grad_vbias
+    hbias.grad = grad_hbias
 
 
-@torch.jit.script
 def _init_chains(
     num_samples: int,
     weight_matrix: Tensor,
 
@@ -353,7 +353,7 @@ def named_parameters(self) -> dict[str, np.ndarray]: ...
     @abstractmethod
     def set_named_parameters(
         named_params: dict[str, np.ndarray],
-        map_model: dict[str, EBM],
+        map_model: dict[str, type[EBM]],
         device: torch.device | str,
         dtype: torch.dtype,
     ) -> Sampler: ...
 
@@ -69,7 +69,7 @@ def load_params(
     index: int,
     device: torch.device | str,
     dtype: torch.dtype,
-    map_model: dict[str, EBM] = map_model,
+    map_model: dict[str, type[EBM]] = map_model,
 ) -> EBM:
     """Load the parameters of the RBM from the specified archive at the given update index.
 
@@ -97,7 +97,7 @@ def load_model(
     device: torch.device | str,
     dtype: torch.dtype,
     restore: bool = False,
-    map_model: dict[str, EBM] = map_model,
+    map_model: dict[str, type[EBM]] = map_model,
 ) -> tuple[EBM, dict[str, Tensor], float]:
     """Load a RBM from a h5 archive.
 
 
@@ -114,7 +114,7 @@ def compute_energy_visibles(self, v: Tensor) -> Tensor:
             const=self.const,
         )
 
-    def compute_gradient(self, data, chains, centered=True, lambda_l1=0.0, lambda_l2=0.0):
+    def compute_gradient(self, data, chains, centered=True):
         _compute_gradient(
             v_data=data["visible"],
             mh_data=data["hidden_mag"],
@@ -126,8 +126,6 @@ def compute_gradient(self, data, chains, centered=True, lambda_l1=0.0, lambda_l2
             hbias=self.hbias,
             weight_matrix=self.weight_matrix,
             centered=centered,
-            lambda_l1=lambda_l1,
-            lambda_l2=lambda_l2,
         )
 
     def independent_model(self):
 
@@ -76,8 +76,6 @@ def compute_gradient(
     chains: dict[str, Tensor],
     params: IGRBM,
     centered: bool,
-    lambda_l1: float = 0.0,
-    lambda_l2: float = 0.0,
 ) -> None:
     _compute_gradient(
         v_data=data["visible"],
@@ -90,8 +88,6 @@ def compute_gradient(
         hbias=params.hbias,
         weight_matrix=params.weight_matrix,
         centered=centered,
-        lambda_l1=lambda_l1,
-        lambda_l2=lambda_l2,
     )
 
 
 
@@ -6,16 +6,17 @@
 from rbms.custom_fn import log2cosh
 
 
-@torch.jit.script
 def _sample_hiddens(
     v: Tensor, weight_matrix: Tensor, hbias: Tensor, beta: float = 1.0
 ) -> Tuple[Tensor, Tensor]:
     mh = hbias + (v @ weight_matrix)
-    h = torch.randn_like(mh) / torch.sqrt(weight_matrix.shape[0]) + mh
+    h = (
+        torch.randn_like(mh) / torch.sqrt(torch.ones_like(mh) * weight_matrix.shape[0])
+        + mh
+    )
     return h, mh
 
 
-@torch.jit.script
 def _sample_visibles(
     h: Tensor, weight_matrix: Tensor, vbias: Tensor, beta: float = 1.0
 ) -> Tuple[Tensor, Tensor]:
@@ -25,7 +26,6 @@ def _sample_visibles(
     return v, mv
 
 
-@torch.jit.script
 def _compute_energy(
     v: Tensor,
     h: Tensor,
@@ -43,7 +43,6 @@ def _compute_energy(
     return -fields - interaction + quad
 
 
-@torch.jit.script
 def _compute_energy_visibles(
     v: Tensor, vbias: Tensor, hbias: Tensor, weight_matrix: Tensor, const: Tensor
 ) -> Tensor:
@@ -53,7 +52,6 @@ def _compute_energy_visibles(
     return -field - quad_term + const
 
 
-@torch.jit.script
 def _compute_energy_hiddens(
     h: Tensor, vbias: Tensor, hbias: Tensor, weight_matrix: Tensor
 ) -> Tensor:
@@ -65,7 +63,6 @@ def _compute_energy_hiddens(
     return -field - log_term.sum(1) + quad
 
 
-@torch.jit.script
 def _compute_gradient(
     v_data: Tensor,
     mh_data: Tensor,
@@ -121,12 +118,11 @@ def _compute_gradient(
             hbias.shape[0], device=hbias.device, dtype=hbias.dtype
         )  # No training on biases
 
-    weight_matrix.grad.set_(grad_weight_matrix)
-    vbias.grad.set_(grad_vbias)
-    hbias.grad.set_(grad_hbias)
+    weight_matrix.grad = grad_weight_matrix
+    vbias.grad = grad_vbias
+    hbias.grad = grad_hbias
 
 
-@torch.jit.script
 def _init_chains(
     num_samples: int,
     weight_matrix: Tensor,