From f9cb05a76a457af4e2b26a255f362d83ed9c1165 Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Tue, 22 Apr 2025 13:37:26 +0200 Subject: [PATCH 01/12] Dump the optimized structure after minimization of relax as a "data.optimized" file. --- lammps_interface/lammps_main.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/lammps_interface/lammps_main.py b/lammps_interface/lammps_main.py index 5243831..3eee360 100755 --- a/lammps_interface/lammps_main.py +++ b/lammps_interface/lammps_main.py @@ -436,8 +436,13 @@ def split_graph(self): def assign_force_fields(self): - attr = {'graph':self.graph, 'cutoff':self.options.cutoff, 'h_bonding':self.options.h_bonding, - 'keep_metal_geometry':self.options.fix_metal, 'bondtype':self.options.dreid_bond_type} + attr = { + 'graph': self.graph, + 'cutoff': self.options.cutoff, + 'h_bonding': self.options.h_bonding, + 'keep_metal_geometry': self.options.fix_metal, + 'bondtype': self.options.dreid_bond_type} + param = getattr(ForceFields, self.options.force_field)(**attr) self.special_commands += param.special_commands() @@ -517,11 +522,12 @@ def compute_simulation_size(self): print("Use format") print("Exiting...") sys.exit() - self.supercell=supercell + + self.supercell = supercell if np.any(np.array(supercell) > 1): print("Re-sizing to a %i x %i x %i supercell. "%(supercell)) - #TODO(pboyd): apply to subgraphs as well, if requested. + # TO DO (pboyd): apply to subgraphs as well, if requested. self.graph.build_supercell(supercell, self.cell) molcount = 0 if self.subgraphs: @@ -1195,9 +1201,12 @@ def construct_input_file(self): inp_str += "%-15s %s\n"%("jump", "SELF loop_min") inp_str += "%-15s %s\n"%("label", "break_min") - # inp_str += "%-15s %s\n"%("unfix", "output") + # Write the final relaxed structure to a file + inp_str += "%-15s %s\n"%("write_data", "data.optimized") + + # inp_str += "%-15s %s\n"%("unfix", "output") # delete bond types etc, for molecules that are rigid - + if (self.options.thermal_anneal): box_min = "aniso" min_style = "cg" @@ -1267,6 +1276,9 @@ def construct_input_file(self): for key in sorted(self.unique_atom_types.keys())]))) inp_str += "run 0\n" + # Write the final relaxed structure to a file + inp_str += "%-15s %s\n"%("write_data", "data.optimized") + for mol in sorted(self.molecule_types.keys()): rep = self.subgraphs[self.molecule_types[mol][0]] if rep.rigid: From 519eec0b268dac0a1f0f1c90c2ab84c0530ef06c Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Tue, 22 Apr 2025 13:40:59 +0200 Subject: [PATCH 02/12] Add the `bond_types_from_cif` option to read the bond types from CIF and not reassign then. --- lammps_interface/structure_data.py | 245 ++++++++++++++++------------- 1 file changed, 136 insertions(+), 109 deletions(-) diff --git a/lammps_interface/structure_data.py b/lammps_interface/structure_data.py index 9293e70..1ef2bd2 100644 --- a/lammps_interface/structure_data.py +++ b/lammps_interface/structure_data.py @@ -42,6 +42,7 @@ DEG2RAD = np.pi / 180. MAX_RECURSION = 500 + class MolecularGraph(nx.Graph): """Contains all information relating a structure file to a fully described classical system. @@ -61,15 +62,15 @@ class MolecularGraph(nx.Graph): - force_field_type """ node_dict_factory = OrderedDict + def __init__(self, **kwargs): """MolecularGraph constructor.""" nx.Graph.__init__(self, **kwargs) # coordinates and distances will be kept in a matrix because # networkx edge and node lookup is slow. - try: - self.name = kwargs['name'] - except KeyError: - self.name = 'default' + + self.name = kwargs.get('name', 'default') + self.coordinates = None self.distance_matrix = None self.original_size = 0 @@ -80,22 +81,24 @@ def __init__(self, **kwargs): self.find_organic_sbus = False self.cell = None self.rigid = False - #TODO(pboyd): networkx edges do not store the nodes in order! + + self.bond_types_from_cif = kwargs.get('bond_types_from_cif', False) + self.check_distences = kwargs.get('check_distances', True) + + # TODO(pboyd): networkx edges do not store the nodes in order! # Have to keep a dictionary lookup to make sure the nodes # are referenced properly (particularly across periodic images) self.sorted_edge_dict = {} self.molecule_images = [] - def nodes_iter2(self, data=True): - #FIXME(pboyd): latest version of NetworkX has removed nodes_iter... + # FIXME(pboyd): latest version of NetworkX has removed nodes_iter... """Oh man, fixing to networkx 2.0 This probably breaks a lot of stuff in the code. THANKS NETWORKX!!!!!!!1 - Extensive testing under way... - + Extensive testing under way... """ - if(data): + if (data): for node in self.nodes(): data = self.nodes[node] yield (node, data) @@ -103,16 +106,15 @@ def nodes_iter2(self, data=True): for node in self.nodes(): yield node - def edges_iter2(self, data=True): - for (n1,n2) in self.edges(): - v1,v2 = self.sorted_edge_dict[(n1,n2)] - #d=self.edges[(n1,n2)] - d=self[n1][n2] - if(data): - yield (v1,v2,d) + for (n1, n2) in self.edges(): + v1, v2 = self.sorted_edge_dict[(n1, n2)] + # d=self.edges[(n1,n2)] + d = self[n1][n2] + if data: + yield (v1, v2, d) else: - yield (v1,v2) + yield (v1, v2) #for n1, n2, d in self.edges_iter(**kwargs): # yield (self.sorted_edge_dict[(n1, n2)][0], self.sorted_edge_dict[(n1,n2)][1], d) @@ -262,7 +264,7 @@ def add_atomic_node(self, **kwargs): #kwargs.update({'special_flag':None}) self.add_node(idx, **kwargs) - def compute_bonding(self, cell, scale_factor = 0.9): + def compute_bonding(self, cell, scale_factor: float = 0.9): """Computes bonds between atoms based on covalent radii.""" # here assume bonds exist, populate data with lengths and # symflags if needed. @@ -282,8 +284,8 @@ def compute_bonding(self, cell, scale_factor = 0.9): if nn2 == n1label: nn1 = n2 nn2 = n1 - self.sorted_edge_dict.update({(n1, n2):(nn1, nn2)}) - self.sorted_edge_dict.update({(n2, n1):(nn1, nn2)}) + self.sorted_edge_dict.update({(n1, n2): (nn1, nn2)}) + self.sorted_edge_dict.update({(n2, n1): (nn1, nn2)}) except KeyError: pass try: @@ -291,8 +293,8 @@ def compute_bonding(self, cell, scale_factor = 0.9): if nn2 == n1label: nn1 = n2 nn2 = n1 - self.sorted_edge_dict.update({(n2, n1):(nn1, nn2)}) - self.sorted_edge_dict.update({(n1, n2):(nn1, nn2)}) + self.sorted_edge_dict.update({(n2, n1): (nn1, nn2)}) + self.sorted_edge_dict.update({(n1, n2): (nn1, nn2)}) except KeyError: pass @@ -300,7 +302,7 @@ def compute_bonding(self, cell, scale_factor = 0.9): bl = data['length'] if bl <= 0.01: id1, id2 = self.nodes[n1]['index']-1, self.nodes[n2]['index']-1 - dist = self.distance_matrix[id1,id2] + dist = self.distance_matrix[id1, id2] data['length'] = dist if (set(sf) == set(['.'])): @@ -314,12 +316,11 @@ def compute_bonding(self, cell, scale_factor = 0.9): # covalent radii. for n1, n2 in itertools.combinations(self.nodes(), 2): node1, node2 = self.nodes[n1], self.nodes[n2] - e1, e2 = node1['element'],\ - node2['element'] + e1, e2 = node1['element'], node2['element'] elements = set([e1, e2]) - i1,i2 = node1['index']-1, node2['index']-1 + i1, i2 = node1['index'] - 1, node2['index'] - 1 rad = (COVALENT_RADII[e1] + COVALENT_RADII[e2]) - dist = self.distance_matrix[i1,i2] + dist = self.distance_matrix[i1, i2] tempsf = scale_factor # probably a better way to fix these kinds of issues.. if (set("F") < elements) and (elements & metals): @@ -330,36 +331,36 @@ def compute_bonding(self, cell, scale_factor = 0.9): # fix for water particle recognition. if(set(["O", "H"]) <= elements): tempsf = 0.8 - # fix for M-NDISA MOFs + # fix for M-NDISA MOFs if(set(["O", "C"]) <= elements): tempsf = 0.8 if (set("O") < elements) and (elements & metals): tempsf = 0.82 - + # very specific fix for Michelle's amine appended MOF - if(set(["N","H"]) <= elements): + if(set(["N", "H"]) <= elements): tempsf = 0.67 - if(set(["Mg","N"]) <= elements): + if(set(["Mg", "N"]) <= elements): tempsf = 0.80 - if(set(["C","H"]) <= elements): + if(set(["C", "H"]) <= elements): tempsf = 0.80 if dist*tempsf < rad and not (alkali & elements): flag = self.compute_bond_image_flag(n1, n2, cell) - self.sorted_edge_dict.update({(n1,n2): (n1, n2), (n2, n1):(n1, n2)}) + self.sorted_edge_dict.update({(n1, n2): (n1, n2), (n2, n1): (n1, n2)}) self.add_edge(n1, n2, key=self.number_of_edges() + 1, order=1.0, weight=1, length=dist, - symflag = flag, - potential = None + symflag=flag, + potential=None ) #TODO(pboyd) update this def compute_bond_image_flag(self, n1, n2, cell): """Update bonds to contain bond type, distances, and min img shift.""" supercells = np.array(list(itertools.product((-1, 0, 1), repeat=3))) - unit_repr = np.array([5,5,5], dtype=int) + unit_repr = np.array([5, 5, 5], dtype=int) atom1 = self.nodes[n1] atom2 = self.nodes[n2] #coord1 = self.coordinates[atom1['index']-1] @@ -458,9 +459,8 @@ def compute_dihedral_between(self, a, b, c, d): def add_bond_edge(self, **kwargs): """Add bond edges (weight factor = 1)""" - #TODO(pboyd) should figure out if there are other cif keywords to identify - # atom types - #TODO(pboyd) this is .cif specific and should be contained within the cif + # TODO(pboyd) should figure out if there are other cif keywords to identify atom types + # TODO(pboyd) this is .cif specific and should be contained within the cif # file reading portion of the code. This is so that other file formats # can eventually be adopted if need be. @@ -481,20 +481,20 @@ def add_bond_edge(self, **kwargs): except KeyError: # assume bond does not straddle a periodic boundary flag = '.' - kwargs.update({'length':length}) + kwargs.update({'length': length}) kwargs.update({'weight': 1}) kwargs.update({'order': order}) kwargs.update({'symflag': flag}) kwargs.update({'potential': None}) # get the node index to avoid headaches - for k,data in self.nodes_iter2(data=True): + for k, data in self.nodes_iter2(data=True): if data['ciflabel'] == n1: n1 = k elif data['ciflabel'] == n2: - n2 =k + n2 = k - self.sorted_edge_dict.update({(n1,n2): (n1, n2), (n2, n1):(n1, n2)}) - self.add_edge(n1, n2, key=self.number_of_edges()+1, **kwargs) + self.sorted_edge_dict.update({(n1, n2): (n1, n2), (n2, n1): (n1, n2)}) + self.add_edge(n1, n2, key=self.number_of_edges() + 1, **kwargs) def compute_cartesian_coordinates(self, cell): """Compute the cartesian coordinates for each atom node""" @@ -510,11 +510,11 @@ def compute_cartesian_coordinates(self, cell): coordinates = np.dot(coordinates, cell.cell) data.update({'cartesian_coordinates':coordinates}) - self.coordinates[data['index']-1] = coordinates + self.coordinates[data['index'] - 1] = coordinates - def compute_min_img_distances(self, cell): + def compute_min_img_distances(self, cell, check_distances: bool = True): self.distance_matrix = np.empty((self.number_of_nodes(), self.number_of_nodes())) - + tmp_one = np.empty((self.number_of_nodes(), 3)) #mp_one_cell = np.empty((self.number_of_nodes(), 3)) for n1 in self.nodes(): @@ -524,19 +524,18 @@ def compute_min_img_distances(self, cell): #mp_one_cell[id1,:] = np.dot(tmp_one[id1,:], cell.cell) for n1, n2 in itertools.combinations(self.nodes(), 2): - id1, id2 = self.nodes[n1]['index']-1,\ - self.nodes[n2]['index']-1 - #coords1, coords2 = self.coordinates[id1], self.coordinates[id2] - coords1, coords2 = self.nodes[n1]['cartesian_coordinates'], self.nodes[n2]['cartesian_coordinates'] + id1, id2 = self.nodes[n1]['index'] - 1, self.nodes[n2]['index'] - 1 + delta = tmp_one[id1,:] - tmp_one[id2,:] three0 = np.around(delta) four0 = np.dot(delta - three0, cell.cell) dist = np.linalg.norm(four0) - + # # perform a distance check here and break with error. - if dist < 0.1: - print("ERROR: distance between atom %i and %i are less than 0.1 Angstrom in the unit cell!" - "Please check your input file for overlapping atoms."%(n1, n2)) + if dist < 0.1 and check_distances: + print( + f"ERROR: distance between atom {n1} and {n2} are less than 0.1 Angstrom in the unit cell!", + "Please check your input file for overlapping atoms.") exit() self.distance_matrix[id1][id2] = dist @@ -563,13 +562,11 @@ def min_img_distance(self, coords1, coords2, cell): return np.linalg.norm(four) def compute_init_typing(self): - """Find possible rings in the structure and - initialize the hybridization for each atom. - More refined determinations of atom and bond types - is computed below in compute_bond_typing + """Find possible rings in the structure and initialize the hybridization for each atom. + More refined determinations of atom and bond types is computed below in compute_bond_typing """ - #TODO(pboyd) return if atoms already 'typed' in the .cif file + # TODO(pboyd) return if atoms already 'typed' in the .cif file # compute and store cycles cycles = [] cycle_shortest_path_cutoff = 10 @@ -600,6 +597,10 @@ def compute_init_typing(self): if element == "C": if self.degree(label) >= 4: self.nodes[label].update({'hybridization':'sp3'}) + # Special case for carboxylate groups add as workaround for + # when the bond types are available in the cif file. + elif self.degree(label) == 3 and ('O' in [self.nodes[k]['element'] for k in neighbours]): + self.nodes[label].update({'hybridization':'aromatic'}) elif self.degree(label) == 3: self.nodes[label].update({'hybridization':'sp2'}) elif self.degree(label) <= 2: @@ -620,7 +621,10 @@ def compute_init_typing(self): # there's probably many cases where this fails, # but carboxylate groups, bridging hydroxy groups # make this true. - if (n_elems <= metals): + # Workaround for the carboxylate groups to be rigid + if ('C' in n_elems): + self.nodes[label].update({'hybridization': 'aromatic'}) + elif (n_elems <= metals): self.nodes[label].update({'hybridization': 'sp2'}) else: self.nodes[label].update({'hybridization':'sp3'}) @@ -652,14 +656,13 @@ def compute_init_typing(self): self.nodes[a]['cycle'] = True self.nodes[a]['rings'].append(cycle) - def compute_bond_typing(self): - """ Compute bond types and atom types based on the local edge - environment. - Messy, loads of 'ifs' - is there a better way to catch chemical features? + """ Compute bond types and atom types based on the local edge environment. + If bond types are already defined in the cif file, use that information. + + Messy, loads of 'ifs': is there a better way to catch chemical features? """ - #TODO(pboyd) return if bonds already 'typed' in the .cif file + double_check = [] for n1, n2, data in self.edges_iter2(data=True): elements = [self.nodes[a]['element'] for a in (n1,n2)] @@ -994,27 +997,29 @@ def compute_topology_information(self, cell, tol, num_neighbours): print("compute_topology_information()") self.compute_cartesian_coordinates(cell) print("func: {}; Elps. {:.3f}s".format("cartesian_coordinates", clock() - t0)) - + self.compute_min_img_distances(cell) print("func: {}; Elps. {:.3f}s".format("min_img_distances", clock() - t0)) - + self.compute_bonding(cell) print("func: {}; Elps. {:.3f}s".format("compute_bonding", clock() - t0)) - + self.compute_init_typing() print("func: {}; Elps. {:.3f}s".format("init_typing", clock() - t0)) - - self.compute_bond_typing() - print("func: {}; Elps. {:.3f}s".format("bond_typing", clock() - t0)) + + # If the bond types area available in the cif file, use them. + if not self.bond_types_from_cif: + self.compute_bond_typing() + print("func: {}; Elps. {:.3f}s".format("bond_typing", clock() - t0)) if (self.find_metal_sbus): self.detect_clusters(num_neighbours, tol) # num neighbors determines how many nodes from the metal element to cut out for comparison print("func: {}; Elps. {:.3f}s".format("detect_clusters", clock() - t0)) - + if (self.find_organic_sbus): self.detect_clusters(num_neighbours, tol, type="Organic") print("func: {}; Elps. {:.3f}s".format("detect_clusters", clock() - t0)) - + self.compute_angles() print("func: {}; Elps. {:.3f}s".format("angles", clock() - t0)) @@ -1574,27 +1579,44 @@ def __or__(self, graph): cliques.sort(key=len) return cliques[-1] + def del_parenth(string): return re.sub(r'\([^)]*\)', '' , string) -def ase_from_CIF(cifname): + +def ase_from_CIF(cifname, **kwargs): ''' - Try to read the cif file using the ase environment. They have considerations for space groups. - We don't. + Try to read the cif file using the ase environment. They have considerations for space groups. + We don't. ''' from ase.io import read ase_cif = read(cifname) - mg = MolecularGraph(name=clean(cifname)) + mg = MolecularGraph( + name=clean(cifname), + bond_types_from_cif=kwargs.get('bond_types_from_cif', False), + check_distances=kwargs.get('check_distances', True) + ) for atom in ase_cif: print(atom) -def from_CIF(cifname): + +def from_CIF(cifname, **kwargs): """Reads the structure data from the CIF - currently does not read the symmetry of the cell - does not unpack the assymetric unit (assumes P1) - assumes that the appropriate keys are in the cifobj (no error checking) + Parameters + ---------- + cifname : str + The name of the cif file to read + bond_types_from_cif : bool, optional + If True, the bond types are read from the cif file. Default is False. + check_distances : bool, optional + If True, the distances are checked and if de distance is less than 0.1 A + the code finishes exectution. Default is True. + """ cifobj = CIF() @@ -1604,57 +1626,64 @@ def from_CIF(cifname): # obtain atoms and cell cell = Cell() # add data to molecular graph (to be parsed later..) - mg = MolecularGraph(name=clean(cifname)) - cellparams = [float(del_parenth(i)) for i in [data['_cell_length_a'], - data['_cell_length_b'], - data['_cell_length_c'], - data['_cell_angle_alpha'], - data['_cell_angle_beta'], - data['_cell_angle_gamma']]] + mg = MolecularGraph( + name=clean(cifname), + bond_types_from_cif=kwargs.get('bond_types_from_cif', False), + check_distances=kwargs.get('check_distances', True), + ) + + cellparams = [float(del_parenth(i)) for i in [ + data['_cell_length_a'], + data['_cell_length_b'], + data['_cell_length_c'], + data['_cell_angle_alpha'], + data['_cell_angle_beta'], + data['_cell_angle_gamma']] + ] + cell.set_params(cellparams) - #add atom nodes + # Add atom nodes id = cifobj.block_order.index('atoms') atheads = cifobj._headings[id] + for atom_data in zip(*[data[i] for i in atheads]): - kwargs = {a:j.strip() for a, j in zip(atheads, atom_data)} + kwargs = {a: j.strip() for a, j in zip(atheads, atom_data)} mg.add_atomic_node(**kwargs) # add bond edges, if they exist try: id = cifobj.block_order.index('bonds') bondheads = cifobj._headings[id] for bond_data in zip(*[data[i] for i in bondheads]): - kwargs = {a:j.strip() for a, j in zip(bondheads, bond_data)} + kwargs = {a: j.strip() for a, j in zip(bondheads, bond_data)} mg.add_bond_edge(**kwargs) - except: + + except Exception as e: # catch no bonds print("No bonds reported in cif file - computing bonding..") + print(e) + mg.store_original_size() mg.cell = cell print("totatomlen =", len(mg._node)) return cell, mg -def write_CIF(graph, cell): + +def write_CIF(graph, cell, path=''): """Currently used for debugging purposes""" c = CIF(name="%s.debug"%graph.name) # data block c.add_data("data", data_=graph.name) - c.add_data("data", _audit_creation_date= - CIF.label(c.get_time())) - c.add_data("data", _audit_creation_method= - CIF.label("Lammps Interface v.%s"%(str(0)))) + c.add_data("data", _audit_creation_date=CIF.label(c.get_time())) + c.add_data("data", _audit_creation_method=CIF.label("Lammps Interface v.%s"%(str(0)))) # sym block - c.add_data("sym", _symmetry_space_group_name_H_M= - CIF.label("P1")) - c.add_data("sym", _symmetry_Int_Tables_number= - CIF.label("1")) - c.add_data("sym", _symmetry_cell_setting= - CIF.label("triclinic")) + c.add_data("sym", _symmetry_space_group_name_H_M=CIF.label("P1")) + c.add_data("sym", _symmetry_Int_Tables_number=CIF.label("1")) + c.add_data("sym", _symmetry_cell_setting=CIF.label("triclinic")) # sym loop block - c.add_data("sym_loop", _symmetry_equiv_pos_as_xyz= - CIF.label("'x, y, z'")) + c.add_data("sym_loop", _symmetry_equiv_pos_as_xyz=CIF.label("'x, y, z'")) # cell block c.add_data("cell", _cell_length_a=CIF.cell_length_a(cell.a)) @@ -1720,7 +1749,7 @@ def write_CIF(graph, cell): CIF.ccdc_geom_bond_type(type)) print('Output cif file written to %s.cif'%c.name) - file = open("%s.cif"%c.name, "w") + file = open(os.path.join(path, "%s.cif"%c.name), "w") file.writelines(str(c)) file.close() @@ -1799,7 +1828,6 @@ def write_PDB(graph, cell): except IndexError: neighbour4 = "%5s"%(" ") - conect_string += "%6s%5s%5s%5s%5s%5s\n"%( "CONECT", # 1 - 6: CONECT node, # 7 - 11: atom serial number @@ -1907,8 +1935,7 @@ def write_RASPA_CIF(graph, cell,classifier=0): file.close() - -def write_RASPA_sim_files(lammps_sim,classifier=0): +def write_RASPA_sim_files(lammps_sim, classifier=0): """ Write the RASPA pseudo_atoms.def file for this MOF All generic adsorbates info automatically included From 291617771587c9b0a5ce0ba306731725166827c4 Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Tue, 22 Apr 2025 13:43:12 +0200 Subject: [PATCH 03/12] Set bond_types_from_cif to False when no bonds are reported in CIF file --- lammps_interface/structure_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lammps_interface/structure_data.py b/lammps_interface/structure_data.py index 1ef2bd2..4b5e9bb 100644 --- a/lammps_interface/structure_data.py +++ b/lammps_interface/structure_data.py @@ -1661,7 +1661,7 @@ def from_CIF(cifname, **kwargs): except Exception as e: # catch no bonds print("No bonds reported in cif file - computing bonding..") - print(e) + mg.bond_types_from_cif = False mg.store_original_size() mg.cell = cell From 76832f7d47a17ff74c63f037115c111ae1863421 Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Tue, 22 Apr 2025 13:54:55 +0200 Subject: [PATCH 04/12] Refactor from_CIF function parameters for clarity --- lammps_interface/structure_data.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lammps_interface/structure_data.py b/lammps_interface/structure_data.py index 4b5e9bb..80581af 100644 --- a/lammps_interface/structure_data.py +++ b/lammps_interface/structure_data.py @@ -512,7 +512,7 @@ def compute_cartesian_coordinates(self, cell): self.coordinates[data['index'] - 1] = coordinates - def compute_min_img_distances(self, cell, check_distances: bool = True): + def compute_min_img_distances(self, cell): self.distance_matrix = np.empty((self.number_of_nodes(), self.number_of_nodes())) tmp_one = np.empty((self.number_of_nodes(), 3)) @@ -532,7 +532,7 @@ def compute_min_img_distances(self, cell, check_distances: bool = True): dist = np.linalg.norm(four0) # # perform a distance check here and break with error. - if dist < 0.1 and check_distances: + if dist < 0.1 and self.check_distances: print( f"ERROR: distance between atom {n1} and {n2} are less than 0.1 Angstrom in the unit cell!", "Please check your input file for overlapping atoms.") @@ -1601,7 +1601,7 @@ def ase_from_CIF(cifname, **kwargs): print(atom) -def from_CIF(cifname, **kwargs): +def from_CIF(cifname, bond_types_from_cif=False, check_distances=True): """Reads the structure data from the CIF - currently does not read the symmetry of the cell - does not unpack the assymetric unit (assumes P1) @@ -1628,8 +1628,8 @@ def from_CIF(cifname, **kwargs): # add data to molecular graph (to be parsed later..) mg = MolecularGraph( name=clean(cifname), - bond_types_from_cif=kwargs.get('bond_types_from_cif', False), - check_distances=kwargs.get('check_distances', True), + bond_types_from_cif=bond_types_from_cif, + check_distances=check_distances, ) cellparams = [float(del_parenth(i)) for i in [ From 7e10c6025405c9d67f694223a048de28e103335b Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Tue, 22 Apr 2025 14:46:03 +0200 Subject: [PATCH 05/12] Finishes the implementation of `ase_from_CIF` method to read the cif file with ASE library --- lammps_interface/structure_data.py | 38 ++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/lammps_interface/structure_data.py b/lammps_interface/structure_data.py index 80581af..433598a 100644 --- a/lammps_interface/structure_data.py +++ b/lammps_interface/structure_data.py @@ -508,7 +508,7 @@ def compute_cartesian_coordinates(self, cell): except KeyError: coordinates = np.array([float(del_parenth(data[i])) for i in fcoord_keys]) coordinates = np.dot(coordinates, cell.cell) - data.update({'cartesian_coordinates':coordinates}) + data.update({'cartesian_coordinates': coordinates}) self.coordinates[data['index'] - 1] = coordinates @@ -1008,7 +1008,7 @@ def compute_topology_information(self, cell, tol, num_neighbours): print("func: {}; Elps. {:.3f}s".format("init_typing", clock() - t0)) # If the bond types area available in the cif file, use them. - if not self.bond_types_from_cif: + if self.bond_types_from_cif is False: self.compute_bond_typing() print("func: {}; Elps. {:.3f}s".format("bond_typing", clock() - t0)) @@ -1584,21 +1584,43 @@ def del_parenth(string): return re.sub(r'\([^)]*\)', '' , string) -def ase_from_CIF(cifname, **kwargs): +def ase_from_CIF(cifname, check_distances=True): ''' Try to read the cif file using the ase environment. They have considerations for space groups. We don't. + ASE looses the information of partial charges, custom atom labels and bond types. + This may be a problem, since we need to know the bond types to build the force field. ''' from ase.io import read ase_cif = read(cifname) + print(len(ase_cif)) + mg = MolecularGraph( name=clean(cifname), - bond_types_from_cif=kwargs.get('bond_types_from_cif', False), - check_distances=kwargs.get('check_distances', True) + check_distances=check_distances ) + for atom in ase_cif: - print(atom) + kwargs = { + '_atom_site_label': atom.symbol + str(atom.index), + '_atom_site_type_symbol': atom.symbol, + '_atom_site_fract_x': str(atom.scaled_position[0]), + '_atom_site_fract_y': str(atom.scaled_position[1]), + '_atom_site_fract_z': str(atom.scaled_position[2]), + '_atom_site_x': str(atom.position[0]), + '_atom_site_y': str(atom.position[1]), + '_atom_site_z': str(atom.position[2]), + } + mg.add_atomic_node(**kwargs) + + mg.store_original_size() + + cell = Cell() + cell.set_params(ase_cif.cell.cellpar()) + mg.cell = cell + print("totatomlen =", len(mg._node)) + return cell, mg def from_CIF(cifname, bond_types_from_cif=False, check_distances=True): @@ -1649,7 +1671,9 @@ def from_CIF(cifname, bond_types_from_cif=False, check_distances=True): for atom_data in zip(*[data[i] for i in atheads]): kwargs = {a: j.strip() for a, j in zip(atheads, atom_data)} + mg.add_atomic_node(**kwargs) + # add bond edges, if they exist try: id = cifobj.block_order.index('bonds') @@ -1658,7 +1682,7 @@ def from_CIF(cifname, bond_types_from_cif=False, check_distances=True): kwargs = {a: j.strip() for a, j in zip(bondheads, bond_data)} mg.add_bond_edge(**kwargs) - except Exception as e: + except: # catch no bonds print("No bonds reported in cif file - computing bonding..") mg.bond_types_from_cif = False From 5b0427dd5dafa87a760e845d32704ae8de0effd2 Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Tue, 22 Apr 2025 15:01:58 +0200 Subject: [PATCH 06/12] Fix typo in variable name for distance checking in MolecularGraph initialization --- lammps_interface/structure_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lammps_interface/structure_data.py b/lammps_interface/structure_data.py index 433598a..69047c4 100644 --- a/lammps_interface/structure_data.py +++ b/lammps_interface/structure_data.py @@ -83,7 +83,7 @@ def __init__(self, **kwargs): self.rigid = False self.bond_types_from_cif = kwargs.get('bond_types_from_cif', False) - self.check_distences = kwargs.get('check_distances', True) + self.check_distances = kwargs.get('check_distances', True) # TODO(pboyd): networkx edges do not store the nodes in order! # Have to keep a dictionary lookup to make sure the nodes From d372f8ef0a2d32bda4a49593a99315fe3c50a2b5 Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Tue, 13 Jan 2026 11:08:56 +0100 Subject: [PATCH 07/12] Improve trajectory writing to include atom types --- lammps_interface/lammps_main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lammps_interface/lammps_main.py b/lammps_interface/lammps_main.py index 3eee360..1f49745 100755 --- a/lammps_interface/lammps_main.py +++ b/lammps_interface/lammps_main.py @@ -1146,8 +1146,10 @@ def construct_input_file(self): " ".join([self.unique_atom_types[key][1]['element'] for key in sorted(self.unique_atom_types.keys())]))) elif self.options.dump_lammpstrj: - inp_str += "%-15s %s\n"%("dump","%s_lammpstrj all atom %i %s_mov.lammpstrj"% - (self.name, self.options.dump_lammpstrj, self.name)) + inp_str += "%-15s %s\n"%("dump","%s_lammpstrj all custom %i %s_mov.lammpstrj id element xs ys zs"% (self.name, self.options.dump_lammpstrj, self.name)) + inp_str += "%-15s %s\n"%("dump_modify", "%s_lammpstrj sort id"% (self.name)) + inp_str += "%-15s %s\n"%("dump_modify", "%s_lammpstrj element "% (self.name, " ".join([self.unique_atom_types[key][1]['element'] + for key in sorted(self.unique_atom_types.keys())]))) # in the meantime we need to map atom id to element that will allow us to # post-process the lammpstrj file and create a cif out of each From c4cc23a3bd9b1924e2324dd0ba08d4279ccb1256 Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Tue, 13 Jan 2026 11:15:42 +0100 Subject: [PATCH 08/12] Refactor dump_modify command to improve readability of element mapping in Lammps input string --- lammps_interface/lammps_main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lammps_interface/lammps_main.py b/lammps_interface/lammps_main.py index 1f49745..c350890 100755 --- a/lammps_interface/lammps_main.py +++ b/lammps_interface/lammps_main.py @@ -1147,9 +1147,10 @@ def construct_input_file(self): for key in sorted(self.unique_atom_types.keys())]))) elif self.options.dump_lammpstrj: inp_str += "%-15s %s\n"%("dump","%s_lammpstrj all custom %i %s_mov.lammpstrj id element xs ys zs"% (self.name, self.options.dump_lammpstrj, self.name)) + + elements = " ".join([self.unique_atom_types[key][1]['element'] for key in sorted(self.unique_atom_types.keys())]) inp_str += "%-15s %s\n"%("dump_modify", "%s_lammpstrj sort id"% (self.name)) - inp_str += "%-15s %s\n"%("dump_modify", "%s_lammpstrj element "% (self.name, " ".join([self.unique_atom_types[key][1]['element'] - for key in sorted(self.unique_atom_types.keys())]))) + inp_str += "%-15s %s\n"%("dump_modify", "%s_lammpstrj element %s"% (self.name, elements)) # in the meantime we need to map atom id to element that will allow us to # post-process the lammpstrj file and create a cif out of each From b556817c2947afa357362c542e693a6a9d49cefe Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Tue, 13 Jan 2026 15:14:10 +0100 Subject: [PATCH 09/12] Refactor optimization on minimize --- lammps_interface/lammps_main.py | 106 ++++++++++++++------------------ 1 file changed, 47 insertions(+), 59 deletions(-) diff --git a/lammps_interface/lammps_main.py b/lammps_interface/lammps_main.py index c350890..558c0b9 100755 --- a/lammps_interface/lammps_main.py +++ b/lammps_interface/lammps_main.py @@ -1161,54 +1161,68 @@ def construct_input_file(self): f.close() if (self.options.minimize): - box_min = "aniso" + # This part has been heavily modified by Felipe Lopes de Oliveira (FLO) + box_min = "tri" min_style = "cg" - min_eval = 1e-6 # HKUST-1 will not minimize past 1e-11 - max_iterations = 100000 # if the minimizer can't reach a minimum in this many steps, - # change the min_eval to something higher. - #inp_str += "%-15s %s\n"%("min_style","fire") - #inp_str += "%-15s %i %s\n"%("compute", 1, "all msd com yes") - #inp_str += "%-15s %-10s %s\n"%("variable", "Dx", "equal c_1[1]") - #inp_str += "%-15s %-10s %s\n"%("variable", "Dy", "equal c_1[2]") - #inp_str += "%-15s %-10s %s\n"%("variable", "Dz", "equal c_1[3]") - #inp_str += "%-15s %-10s %s\n"%("variable", "MSD", "equal c_1[4]") - #inp_str += "%-15s %s %s\n"%("fix", "output all print 1", "\"$(vol),$(cella),$(cellb),$(cellc),${Dx},${Dy},${Dz},${MSD}\"" + - # " file %s.min.csv title \"Vol,CellA,CellB,CellC,Dx,Dy,Dz,MSD\" screen no"%(self.name)) - inp_str += "%-15s %s\n"%("min_style", min_style) - inp_str += "%-15s %s\n"%("print", "\"MinStep,CellMinStep,AtomMinStep,FinalStep,Energy,EDiff\"" + - " file %s.min.csv screen no"%(self.name)) + min_eval = self.options.min_eval # HKUST-1 will not minimize past 1e-11 + max_iterations = 100000 # if the minimizer can't reach a minimum in this many steps, change the min_eval to something higher. + + inp_str += "#### Start of optimization ####\n\n" + inp_str += "%-15s %-10s %s\n"%("variable", "natoms", "equal count(all)") + + inp_str += "%-15s %s\n"%("print", "\"Iter,PeTotal,DiffPe,PePerAtom,DiffPerAtom,Vol,F_max,F_Norm\"" + + " file %s.min.csv screen no\n"%(self.name)) + + inp_str += "# Loop Initiation\n\n" + inp_str += "%-15s %-10s %s\n"%("variable", "min_eval", "equal %.2e"%(min_eval)) inp_str += "%-15s %-10s %s\n"%("variable", "prev_E", "equal %.2f"%(50000.)) # set unreasonably high for first loop + inp_str += "%-15s %-10s %s\n"%("variable", "prev_E_atom", "equal %.2f"%(50000.)) # set unreasonably high for first loop inp_str += "%-15s %-10s %s\n"%("variable", "iter", "loop %i"%(max_iterations)) + + inp_str += "%-15s %-10s %s\n"%("variable", "F_max_comp", "equal fmax") + inp_str += "%-15s %-10s %s\n"%("variable", "F_norm_max", "equal fnorm") + inp_str += "%-15s %s\n"%("label", "loop_min") fix = self.fixcount() - inp_str += "%-15s %s\n"%("min_style", min_style) - inp_str += "%-15s %s\n"%("fix","%i all box/relax %s 0.0 vmax 0.01"%(fix, box_min)) - inp_str += "%-15s %s\n"%("minimize","1.0e-15 1.0e-15 10000 100000") - inp_str += "%-15s %s\n"%("unfix", "%i"%fix) - inp_str += "%-15s %s\n"%("min_style", "fire") - inp_str += "%-15s %-10s %s\n"%("variable", "tempstp", "equal $(step)") - inp_str += "%-15s %-10s %s\n"%("variable", "CellMinStep", "equal ${tempstp}") - inp_str += "%-15s %s\n"%("minimize","1.0e-15 1.0e-15 10000 100000") - inp_str += "%-15s %-10s %s\n"%("variable", "AtomMinStep", "equal $(step)") - inp_str += "%-15s %-10s %s\n"%("variable", "temppe", "equal $(pe)") - inp_str += "%-15s %-10s %s\n"%("variable", "min_E", "equal abs(${prev_E}-${temppe})") - inp_str += "%-15s %s\n"%("print", "\"${iter},${CellMinStep},${AtomMinStep},${AtomMinStep}," + - "$(pe),${min_E}\"" + + + inp_str += "\n # --- Stage 1: Box Relaxation ---\n" + + inp_str += " %-15s %s\n"%("min_style", min_style) + inp_str += " %-15s %s\n"%("fix","%i all box/relax %s 0.0 vmax 0.01"%(fix, box_min)) + inp_str += " %-15s %s\n"%("minimize","1.0e-8 1.0e-8 5000 10000") + inp_str += " %-15s %s\n"%("unfix", "%i"%fix) + + inp_str += "\n # --- Stage 2: Atom Relaxation ---\n" + inp_str += " %-15s %s\n"%("min_style", "fire") + inp_str += " %-15s %-10s %s\n"%("variable", "tempstp", "equal $(step)") + inp_str += " %-15s %-10s %s\n"%("variable", "CellMinStep", "equal ${tempstp}") + inp_str += " %-15s %s\n"%("minimize","1.0e-10 1.0e-10 10000 20000") + + inp_str += "\n # --- Convergence Check (Per Atom) ---\n" + inp_str += " %-15s %-10s %s\n"%("variable", "curr_pe", "equal pe") + inp_str += " %-15s %-10s %s\n"%("variable", "curr_pe_atom", "equal v_curr_pe / v_natoms") + inp_str += " %-15s %-10s %s\n"%("variable", "vol", "equal vol") + inp_str += " %-15s %-10s %s\n"%("variable", "diff_total", "equal abs(${prev_E}-${curr_pe})") + inp_str += " %-15s %-10s %s\n"%("variable", "diff_atom", "equal abs(${prev_E_atom}-${curr_pe_atom})") + + inp_str += " %-15s %s\n\n"%("print", "\"${iter},${curr_pe},${diff_total},${curr_pe_atom},${diff_atom},${vol},${F_max_comp},${F_norm_max}\"" + " append %s.min.csv screen no"%(self.name)) - inp_str += "%-15s %s\n"%("if","\"${min_E} < ${min_eval}\" then \"jump SELF break_min\"") - inp_str += "%-15s %-10s %s\n"%("variable", "prev_E", "equal ${temppe}") - inp_str += "%-15s %s\n"%("next", "iter") + inp_str += " %-15s %-10s %s\n"%("variable", "prev_E", "equal ${curr_pe}") + inp_str += " %-15s %-10s %s\n\n"%("variable", "prev_E_atom", "equal ${curr_pe_atom}") + + inp_str += " %-15s %s\n"%("if","\"${diff_atom} < ${min_eval}\" then \"jump SELF break_min\"") + + inp_str += " %-15s %s\n"%("next", "iter") inp_str += "%-15s %s\n"%("jump", "SELF loop_min") inp_str += "%-15s %s\n"%("label", "break_min") + inp_str += "\n#### End of optimization ####\n\n" # Write the final relaxed structure to a file inp_str += "%-15s %s\n"%("write_data", "data.optimized") - # inp_str += "%-15s %s\n"%("unfix", "output") - # delete bond types etc, for molecules that are rigid if (self.options.thermal_anneal): box_min = "aniso" @@ -1256,32 +1270,6 @@ def construct_input_file(self): for key in sorted(self.unique_atom_types.keys())]))) inp_str += "run 0\n" - if (self.options.relax): - box_min = "aniso" - min_style = "cg" - inp_str += "run 0\n" - inp_str += "%-15s %-10s %s\n"%("variable", "inputpe", "equal $(pe)") - inp_str += "%-15s %s\n"%("min_style", min_style) - fix = self.fixcount() - inp_str += "%-15s %-10s %s\n"%("variable", "tempstp", "equal $(step)") - inp_str += "%-15s %s\n"%("minimize","1.0e-15 1.0e-15 10000 100000") - inp_str += "%-15s %-10s %s\n"%("variable", "AtomMinStep", "equal $(step)") - inp_str += "%-15s %-10s %s\n"%("variable", "relaxedpe", "equal $(pe)") - inp_str += "%-15s %-10s %s\n"%("variable", "min_E", "equal abs(${inputpe}-${relaxedpe})") - inp_str += "%-15s %s\n"%("print", "\"${AtomMinStep}," + - "$(pe),${min_E}\"" + - " append %s.min.csv screen no"%(self.name)) - inp_str += "%-15s %s\n"%("dump","%s_relax all custom 1 relaxed_%s.lammpstrj element xs ys zs"% - (self.name, self.name)) - inp_str += "%-15s %s\n"%("dump_modify", "%s_relax element %s"%( - self.name, - " ".join([self.unique_atom_types[key][1]['element'] - for key in sorted(self.unique_atom_types.keys())]))) - inp_str += "run 0\n" - - # Write the final relaxed structure to a file - inp_str += "%-15s %s\n"%("write_data", "data.optimized") - for mol in sorted(self.molecule_types.keys()): rep = self.subgraphs[self.molecule_types[mol][0]] if rep.rigid: From 04bdc95fd04b91929839627778dfd29b3a380d16 Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Tue, 13 Jan 2026 15:58:59 +0100 Subject: [PATCH 10/12] Enhance sulfur force field type assignment based on graph degree --- lammps_interface/ForceFields.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/lammps_interface/ForceFields.py b/lammps_interface/ForceFields.py index e49f4d7..518d0ca 100644 --- a/lammps_interface/ForceFields.py +++ b/lammps_interface/ForceFields.py @@ -2499,8 +2499,12 @@ def detect_ff_terms(self): if neigh_elem <= set(["Si", "Al"]): data['force_field_type'] = "O_3_z" elif data['element'] == "S": - # default sp3 hybridized sulphur set to S_3+6 - data['force_field_type'] = "S_3+6" + if self.graph.degree(node) == 4: + data['force_field_type'] = "S_3+6" + elif self.graph.degree(node) == 3: + data['force_field_type'] = "S_3+4" + elif self.graph.degree(node) == 2: + data['force_field_type'] = "S_3+2" elif data['hybridization'] == "aromatic": data['force_field_type'] = "%s_R"%data['element'] @@ -3616,6 +3620,14 @@ def detect_ff_terms(self): data['force_field_type'] = "%s_2"%data['element'] elif data['hybridization'] == "sp": data['force_field_type'] = "%s_1"%data['element'] + + if data['element'] == "S": + if self.graph.degree(node) == 4: + data['force_field_type'] = "S_3+6" + elif self.graph.degree(node) == 3: + data['force_field_type'] = "S_3+4" + elif self.graph.degree(node) == 2: + data['force_field_type'] = "S_3+2" if data['element'] == "O" and self.graph.degree(node) == 2: if neigh_elem <= metals: data['force_field_type'] = "O_2" From 06e86db014373a9ce7132625c1cc38a16c0bc77b Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Tue, 13 Jan 2026 16:23:33 +0100 Subject: [PATCH 11/12] Refactor minimize command parameters to use dynamic evaluation for better precision --- lammps_interface/lammps_main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lammps_interface/lammps_main.py b/lammps_interface/lammps_main.py index 558c0b9..6671379 100755 --- a/lammps_interface/lammps_main.py +++ b/lammps_interface/lammps_main.py @@ -1191,14 +1191,14 @@ def construct_input_file(self): inp_str += " %-15s %s\n"%("min_style", min_style) inp_str += " %-15s %s\n"%("fix","%i all box/relax %s 0.0 vmax 0.01"%(fix, box_min)) - inp_str += " %-15s %s\n"%("minimize","1.0e-8 1.0e-8 5000 10000") + inp_str += " %-15s %s\n"%("minimize","%.2e %.2e 5000 10000"%(min_eval, min_eval)) inp_str += " %-15s %s\n"%("unfix", "%i"%fix) inp_str += "\n # --- Stage 2: Atom Relaxation ---\n" inp_str += " %-15s %s\n"%("min_style", "fire") inp_str += " %-15s %-10s %s\n"%("variable", "tempstp", "equal $(step)") inp_str += " %-15s %-10s %s\n"%("variable", "CellMinStep", "equal ${tempstp}") - inp_str += " %-15s %s\n"%("minimize","1.0e-10 1.0e-10 10000 20000") + inp_str += " %-15s %s\n"%("minimize","%.2e %.2e 10000 20000"%(min_eval, min_eval)) inp_str += "\n # --- Convergence Check (Per Atom) ---\n" inp_str += " %-15s %-10s %s\n"%("variable", "curr_pe", "equal pe") From b463ff2bc05fed0489fcf5c0b62cff79bfb235f1 Mon Sep 17 00:00:00 2001 From: Felipe Lopes Date: Mon, 19 Jan 2026 10:23:42 +0100 Subject: [PATCH 12/12] Add minimal energy change parameter for optimization convergence --- lammps_interface/InputHandler.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lammps_interface/InputHandler.py b/lammps_interface/InputHandler.py index f4211d4..b64e8ba 100644 --- a/lammps_interface/InputHandler.py +++ b/lammps_interface/InputHandler.py @@ -239,6 +239,14 @@ def run_command_line_options(self): help="Max deviation of adjusted variable "+ "at each step is scaled by MAX_DEV/ITER_COUNT. "+ "Default is 0.01 (ideal for volume).") + parameter_group.add_argument("--min_eval", + action="store", + type=float, + default=0.000001, + dest="min_eval", + help="Minimal change in energy per atom to consider" + + "the optimization converged." + + "Default is 1e-6 (ideal for most cases).") parameter_group.add_argument("--temperature", action="store", type=float,