add weights more dynamic with size

jcapp4 · jcapp4 · commit b42edb2a5cb5 · 2025-12-12T17:25:51.000+01:00
diff --git a/chebai/preprocessing/datasets/base.py b/chebai/preprocessing/datasets/base.py
@@ -725,6 +725,7 @@ def __init__(
         self,
             ensemble=True,
             load_path=None,
+            dim = 1528,
         **kwargs,
     ):
         super(_DynamicDataset, self).__init__(**kwargs)
@@ -733,7 +734,7 @@ def __init__(
         self._dynamic_df_train = None
         self._dynamic_df_test = None
         self._dynamic_df_val = None
-        self.loader= Ensemble_loader(ensemble=ensemble,load_path=load_path)
+        self.loader= Ensemble_loader(ensemble=ensemble,load_path=load_path,dim=dim)
         # Path of csv file which contains a list of ids & their assignment to a dataset (either train,
         # validation or test).
         self.splits_file_path = self._validate_splits_file_path(
@@ -1188,7 +1189,7 @@ def load_processed_data(
 
                     if self.loader.ensemble:
 
-                        data = self.loader.add_val_weights(data)
+                        data = self.loader.add_val_weights(data,self.loader.dim)
                         if self.loader.load_path is not None:
 
                             data = self.loader.add_duplicates(data,self.loader.load_path)
@@ -1197,7 +1198,7 @@ def load_processed_data(
                         data = self.loader.add_train_weights(data,self.loader.load_path)
 
                 if kind == "validation" :
-                    data = self.loader.add_val_weights(data)
+                    data = self.loader.add_val_weights(data,self.loader.dim)
 
 
 
diff --git a/extras/adamh.py b/extras/adamh.py
@@ -11,9 +11,11 @@ def __init__(
             #True :bagging, False : boosting
             ensemble:bool,
             load_path:str,
+            dim:int,
     ):
         self.ensemble=ensemble
         self.load_path=load_path
+        self.dim=dim
 
 
 
@@ -26,14 +28,13 @@ def add_train_weights(self,ids,load_path):
             if it % 10000 == 0:
                 print(it)
             ident = i["ident"]
-            print(d[str(ident)])
             i["weight"] = d[str(ident)]
             it = it + 1
         return ids
 
-    def add_val_weights(self,ids):
+    def add_val_weights(self,ids,dim):
         for i in ids:
-            i["weight"] = [1]*1528
+            i["weight"] = [1]*dim
         return ids
     #dict reverse to the dict created by the method bootstrapping in sample.py
     def add_duplicates(self,data,load_path):
@@ -64,35 +65,35 @@ def create_data_weights(batchsize:int,dim:int,weights:dict[str,list[float,...]],
         index = index + 1
     return weight
 
-def create_weight(path_to_split="/home/programmer/Bachelorarbeit/split/splits.csv"):
-    weights = {}
-    with open(path_to_split, 'r') as csvfile:
-        reader = csv.reader(csvfile)
-        i = 0
-        for row in reader:
-            if (row[1] == "train") and i > 0:
-                #print(row[0])
-                weights[row[0]] = torch.full((1,1528),int(row[0]))
-                #print(row[0])
-            i = i +1
-        print(len(weights))
-    torch.save(weights,"/home/programmer/Bachelorarbeit/weights/init_mh.pt")
+# def create_weight(path_to_split="/home/programmer/Bachelorarbeit/split/splits.csv"):
+#     weights = {}
+#     with open(path_to_split, 'r') as csvfile:
+#         reader = csv.reader(csvfile)
+#         i = 0
+#         for row in reader:
+#             if (row[1] == "train") and i > 0:
+#                 #print(row[0])
+#                 weights[row[0]] = torch.full((1,1528),int(row[0]))
+#                 #print(row[0])
+#             i = i +1
+#         print(len(weights))
+#     torch.save(weights,"/home/programmer/Bachelorarbeit/weights/init_mh.pt")
 
 
 #for 1_ada_no_normal_weights weights =0.0001
-def new_create_weight(path_to_split="/home/programmer/Bachelorarbeit/split/splits.csv"):
+def new_create_weight(path_to_split="/home/programmer/Bachelorarbeit/split/reworked_splits.csv"):
     weights = {}
     with open(path_to_split, 'r') as csvfile:
         reader = csv.reader(csvfile)
         i = 0
         for row in reader:
             if (row[1] == "train") and i > 0:
                 # print(row[0])
-                weights[row[0]] = [1/(1528*160715)]* 1528
+                weights[row[0]] = [(1/(1528*160677))*10000]* 1528
                 # print(row[0])
             i = i + 1
         print(len(weights))
-    torch.save(weights, "/home/programmer/Bachelorarbeit/weights/init_mh.pt")
+    torch.save(weights, "/home/programmer/Bachelorarbeit/weights/init_mh_10000.pt")
 
 
 
@@ -114,4 +115,4 @@ def new_create_weight(path_to_split="/home/programmer/Bachelorarbeit/split/split
 
 
 #new_create_weight()
-#create_weight()
+