Sample selection update to featomic (#105)

HannaTuerk · web-flow · commit f7a606f7d586 · 2024-12-19T16:49:43.000+01:00
* featomic update

---------
diff --git a/examples/sample-selection/environment.yml b/examples/sample-selection/environment.yml
@@ -9,6 +9,6 @@ dependencies:
     - chemiscope>=0.7
     - matplotlib
     - metatensor
-    - rascaline @ git+https://github.com/Luthaf/rascaline@ca957642f512e141c7570e987aadc05c7ac71983
+    - featomic
     - skmatter
     - equisolve @ git+https://github.com/lab-cosmo/equisolve.git@c858bedef4b2799eb445e4c92535ee387224089a
diff --git a/examples/sample-selection/sample-selection.py b/examples/sample-selection/sample-selection.py
@@ -4,7 +4,7 @@
 
 :Authors: Davide Tisi `@DavideTisi <https://github.com/DavideTisi>`_
 
-In this tutorial we generate descriptors using rascaline, then select a subset
+In this tutorial we generate descriptors using featomic, then select a subset
 of structures using both the farthest-point sampling (FPS) and CUR algorithms
 implemented in scikit-matter. Finally, we also generate a selection of
 the most important features using the same techniques.
@@ -19,8 +19,8 @@
 import metatensor
 import numpy as np
 from equisolve.numpy import feature_selection, sample_selection
+from featomic import SoapPowerSpectrum
 from matplotlib import pyplot as plt
-from rascaline import SoapPowerSpectrum
 from sklearn.decomposition import PCA
 from skmatter import feature_selection as skfeat_selection
 
@@ -37,22 +37,25 @@
 frames = ase.io.read("input-fps.xyz", f":{n_frames}", format="extxyz")
 
 # %%
-# Compute SOAP descriptors using rascaline
+# Compute SOAP descriptors using featomic
 # ----------------------------------------
 #
-# First, define the rascaline hyperparameters used to compute SOAP.
+# First, define the featomic hyperparameters used to compute SOAP.
 
 
-# rascaline hyperparameters
+# featomic hyperparameters
 hypers = {
-    "cutoff": 6.0,
-    "max_radial": 8,
-    "max_angular": 6,
-    "atomic_gaussian_width": 0.3,
-    "cutoff_function": {"ShiftedCosine": {"width": 0.5}},
-    "radial_basis": {"Gto": {"accuracy": 1e-6}},
-    "radial_scaling": {"Willatt2018": {"exponent": 4, "rate": 1, "scale": 3.5}},
-    "center_atom_weight": 1.0,
+    "cutoff": {"radius": 6.0, "smoothing": {"type": "ShiftedCosine", "width": 0.5}},
+    "density": {
+        "type": "Gaussian",
+        "width": 0.3,
+        "scaling": {"type": "Willatt2018", "exponent": 4, "rate": 1, "scale": 3.5},
+    },
+    "basis": {
+        "type": "TensorProduct",
+        "max_angular": 6,
+        "radial": {"type": "Gto", "max_radial": 7},
+    },
 }
 
 # Generate a SOAP power spectrum
@@ -61,13 +64,13 @@
 
 
 # Makes a dense block
-atom_soap = rho2i.keys_to_properties(["species_neighbor_1", "species_neighbor_2"])
+atom_soap = rho2i.keys_to_properties(["neighbor_1_type", "neighbor_2_type"])
 
-atom_soap_single_block = atom_soap.keys_to_samples(keys_to_move=["species_center"])
+atom_soap_single_block = atom_soap.keys_to_samples(keys_to_move=["center_type"])
 
 # Sum over atomic centers to compute structure features
 struct_soap = metatensor.sum_over_samples(
-    atom_soap_single_block, sample_names=["center", "species_center"]
+    atom_soap_single_block, sample_names=["atom", "center_type"]
 )
 
 
@@ -119,13 +122,13 @@
 # Print the selected envs for each block
 print("atomic envs selected with FPS:\n")
 for key, block in selector_atomic_fps.support.items():
-    print("species_center:", key, "\n(struct_idx, atom_idx)\n", block.samples.values)
+    print("center_type:", key, "\n(struct_idx, atom_idx)\n", block.samples.values)
 
 selector_atomic_cur = sample_selection.CUR(n_to_select=n_envs).fit(atom_soap)
 # Print the selected envs for each block
 print("atomic envs selected with CUR:\n")
 for key, block in selector_atomic_cur.support.items():
-    print("species_center:", key, "\n(struct_idx, atom_idx)\n", block.samples.values)
+    print("center_type:", key, "\n(struct_idx, atom_idx)\n", block.samples.values)
 
 
 # %%
@@ -134,7 +137,7 @@
 #
 # One can also select from a combined pool of atomic environments and
 # structures, instead of selecting an equal number of atomic environments for
-# each chemical species. In this case, we can move the 'species_center' key to samples
+# each chemical species. In this case, we can move the 'center_type' key to samples
 # such that our descriptor is a TensorMap consisting of a single block. Upon
 # sample selection, the most diverse atomic environments will be selected,
 # regardless of their chemical species.
@@ -155,7 +158,7 @@
     atom_soap_single_block
 )
 print(
-    "atomic envs selected with FPS: \n (struct_idx, atom_idx, species_center) \n",
+    "atomic envs selected with FPS: \n (struct_idx, atom_idx, center_type) \n",
     selector_atomic_fps.support.block(0).samples.values,
 )