From 97f040a468d7ea6a26622a1d7f3cc47829ea6f99 Mon Sep 17 00:00:00 2001 From: Jack Lawrence Date: Mon, 27 Jan 2025 11:56:31 -0500 Subject: [PATCH] Updated scripts, small changes to applied_efp.rst --- doc/applied_efp.rst | 9 +- examples/flex-EFP/Scripts/cut_qm.py | 15 +- examples/flex-EFP/Scripts/fragment_RMSD.py | 230 +++------------------ examples/flex-EFP/Scripts/make_AAs.py | 24 ++- examples/flex-EFP/Scripts/make_mm.py | 1 + 5 files changed, 52 insertions(+), 227 deletions(-) diff --git a/doc/applied_efp.rst b/doc/applied_efp.rst index 7dae6bb2..8eb8417a 100644 --- a/doc/applied_efp.rst +++ b/doc/applied_efp.rst @@ -7,11 +7,14 @@ FMO: Applied flexible EFP Overview -------- -As is the case with many photoactive proteins,computational methods struggle to reproduce -experimental spectra for the Fenna-Matthews-Olson complex (FMO). Work by +Computational methods struggle to reproduce experimental spectra in photoactive proteins. +One such protein is the the Fenna-Matthews-Olson complex (FMO). Work by `Kim et al `_ shows that flexible QM/EFP can be applied to FMO to correctly generate computational results in -quantitative agreement to experimental spectra. +quantitative agreement to experimental spectra. In short, the solvatochromic environments +surrounding each bacteriochloropyll a (BChl a) pigment are heavily polarizable; EFP captures +this polarizability explicitly where a standard QM/MM calculation will onyl approximate it, +thus the polarizable environment is crucial to accurately model photosynthesis. The key to applying EFP to your system is to carefully define the active site and EFP region. FMO is a trimeric protein with eight bacteriochloropyll a (BChl) pigments in each monomer. diff --git a/examples/flex-EFP/Scripts/cut_qm.py b/examples/flex-EFP/Scripts/cut_qm.py index 446c36bc..6c7ccb51 100644 --- a/examples/flex-EFP/Scripts/cut_qm.py +++ b/examples/flex-EFP/Scripts/cut_qm.py @@ -243,20 +243,21 @@ def get_screen(lines,coords,title): if title in line: start=1 -def get_header(lines): +def get_header(lines,input_name): header=[] + fragname=input_name.split('.')[0] for line in lines: - header.append(line) + header.append(line.replace('$FRAGNAME',fragname)) #print(line) if 'COORDINATES (BOHR)' in line: return header def main(inp, efp): - with open(inp,'r') as inp: - inp_lines=inp.readlines() - with open(efp,'r') as efp: - efp_lines=efp.readlines() - header=get_header(efp_lines) + with open(inp,'r') as inp_: + inp_lines=inp_.readlines() + with open(efp,'r') as efp_: + efp_lines=efp_.readlines() + header=get_header(efp_lines,inp) rem_atoms, rem_pols = get_specials(inp_lines) keep_coords,rem_coords=get_coords(efp_lines,rem_atoms,rem_pols) keep_monop=get_monopoles(efp_lines,keep_coords) diff --git a/examples/flex-EFP/Scripts/fragment_RMSD.py b/examples/flex-EFP/Scripts/fragment_RMSD.py index b5322f8b..95b2fa3b 100644 --- a/examples/flex-EFP/Scripts/fragment_RMSD.py +++ b/examples/flex-EFP/Scripts/fragment_RMSD.py @@ -12,74 +12,14 @@ ang_cutoff=0.20 # Minimum RMSD allowed for "good" match cutoff=ang_cutoff*1.8897259886 # Cutoff convert to Bohr -inp=sys.argv[1] # a_22_304.inp for example. Input file with fragment coords +inp=sys.argv[1] +#inp='g_952.inp' # a_22_304.inp for example. Input file with fragment coords with open(inp, "r") as orig: orgs = orig.readlines() outname=inp.replace('.inp','.efp') # Output file name, same as input but changed extension. # If no match is found, then outname will not be used. -def rotate_vector(vector, rotation_matrix): - #Different from coords because dipoles assumed centered on coordinate. NOT placed in Cartesian space - return np.dot(vector,rotation_matrix) - -def rotate_quadrupole(quadrupole, rotation_matrix): - #R x Q x R_t - #Quadrupoles are 3x3s. But .efp give only six values, order is assumed xx, xy, xz, yy, yz, zz - #With symmetric off-diagonal elements. - q = np.array([ - [quadrupole[0], quadrupole[3], quadrupole[4]], - [quadrupole[3], quadrupole[1], quadrupole[5]], - [quadrupole[4], quadrupole[5], quadrupole[2]], - ]) - q_rotated = rotation_matrix.T @ q @ rotation_matrix - return [q_rotated[0, 0], q_rotated[1, 1], q_rotated[2, 2], - q_rotated[0, 1], q_rotated[0, 2], q_rotated[1, 2]] - -def rotate_octupole(octupole, rotation_matrix): - #Oxyz=Rxi*Ryj*Rzk*Oijk summed over i=x,y,z;j=x,y,z;z=x,y,z; - #Each term Oxyz of new tensor is composed of - #27 pieces generated from old tensor and rotation matrix! - #The indexes are hard-coded for some better readiability - o = np.array([ - [[octupole[0], octupole[3], octupole[4]], [octupole[3], octupole[5], octupole[9]], [octupole[4], octupole[9], octupole[7]]], - [[octupole[3], octupole[5], octupole[9]], [octupole[5], octupole[1], octupole[6]], [octupole[9], octupole[6], octupole[8]]], - [[octupole[4], octupole[9], octupole[7]], [octupole[9], octupole[6], octupole[8]], [octupole[7], octupole[8], octupole[2]]], - ]) - #fancy function below does what we want. Look up np.einsum if you need clarification. - #o_rotated = np.einsum('ij,jkl,lm->ikm', rotation_matrix, o, rotation_matrix.T) - o_rotated = np.zeros((3, 3, 3)) - for p in range(0,3): - for q in range(0,3): - for r in range(0,3): - o_rotated[p][q][r]=single_term_oct(p,q,r,o,rotation_matrix.T) - return [ - o_rotated[0, 0, 0], o_rotated[1, 1, 1], o_rotated[2, 2, 2], - o_rotated[0, 0, 1], o_rotated[0, 0, 2], o_rotated[0, 1, 1], - o_rotated[1, 1, 2], o_rotated[0, 2, 2], o_rotated[1, 2, 2], - o_rotated[0, 1, 2] - ] - -def single_term_oct(p,q,r,octupole,rot_mat): - term=0.0 - for i in range(0,3): - for j in range(0,3): - for k in range(0,3): - term+=rot_mat[p][i]*rot_mat[q][j]*rot_mat[r][k]*octupole[i][j][k] - return term - -def rotate_polarizability(polarizability, rotation_matrix): - #RxQxR_t - q = np.array([ - [polarizability[0], polarizability[3], polarizability[4]], - [polarizability[6], polarizability[1], polarizability[5]], - [polarizability[7], polarizability[8], polarizability[2]]]) - q_rotated = rotation_matrix.T @ q @ rotation_matrix - return [q_rotated[0, 0],q_rotated[1, 1],q_rotated[2, 2], - q_rotated[0, 1],q_rotated[0, 2],q_rotated[1, 2], - q_rotated[1, 0],q_rotated[2, 0],q_rotated[2, 1]] - #return q_rotated.flatten() - def get_RMSD(coords1,coords2,mass): tot=0.0 length=len(coords1) @@ -106,8 +46,8 @@ def kabsch_algorithm(coords, coords2): u, _, vh = np.linalg.svd(covariance_matrix) rotation_matrix = np.dot(u, vh) - #if np.linalg.det(rotation_matrix) < 0: - # rotation_matrix[:, -1] *= -1 + if np.linalg.det(rotation_matrix) < 0: + rotation_matrix[:, -1] *= -1 return rotation_matrix, com1, com2 def apply_transform(coords, rotation_matrix, com1, com2): @@ -118,28 +58,34 @@ def apply_transform(coords, rotation_matrix, com1, com2): return rotated_coords amino_acid_dict = {'a':'ala','r':'arg','n':'asn','d':'asp','c':'cys', - 'q':'gln','e':'glu','g':'gly','h':'his','i':'ile', + 'q':'gln','e':'glu','g':'gly','h':'hip','i':'ile', 'l':'leu','k':'lys','m':'met','f':'phe','p':'pro', 's':'ser','t':'thr','w':'trp','y':'tyr','v':'val', 'hp':'hip','hd':'hid','he':'hie'} atom_weights = {'H':1.0,'O':15.9949100,'C':12.0000000,'N':12.0030700,'S':32.0650000,'M':23.3040000} try: - res=amino_acid_dict[inp[0]] + res=amino_acid_dict[inp.split('_')[0]] except: print('No library folder for: '+inp) -#directory = '/depot/lslipche/data/yb_boss/flexible_efp/efpdb/'+res + exit() + directory+=res + file_extension = '.efp' # change this to your desired file extension + +#Grab coordinates from the input file, convert from Angstroms to bohrs. start=0 xyz=[] orig_coords=[] weights=[] +inp_at_lines=[] for line in orgs: if '$end' in line: start=0 elif(start==1): + inp_at_lines.append(line) xyz.append(float(line.split()[2])/0.529177249) xyz.append(float(line.split()[3])/0.529177249) xyz.append(float(line.split()[4])/0.529177249) @@ -148,18 +94,16 @@ def apply_transform(coords, rotation_matrix, com1, com2): weights.append(atom_weights[line.split()[0][0]]) elif 'C1' in line: start=1 - -minrmsd=20.0 +minrmsd=20.0 # Arbitrary starting value. This will be replaced with real RMSD values in the loop. # Loop through files in the directory for filename in os.listdir(directory): if filename.endswith(file_extension): full_path = os.path.join(directory, filename) - #full_path=directory+'/gly5817.efp' - #print(f"Processing file: {full_path}") with open(full_path, "r") as term: terms = term.readlines() - + else: + continue start=0 xyz=[] term_coords=[] @@ -167,8 +111,6 @@ def apply_transform(coords, rotation_matrix, com1, com2): if 'BO21' in line: start=0 elif(start==1): - #print(line.split()[1]) - #print(float(line.split()[1])*0.529177249) xyz.append(float(line.split()[1])) xyz.append(float(line.split()[2])) xyz.append(float(line.split()[3])) @@ -181,139 +123,13 @@ def apply_transform(coords, rotation_matrix, com1, com2): rotation_matrix, com1, com2 = kabsch_algorithm(new_coords, coords2) aligned_new_coords = apply_transform(new_coords, rotation_matrix, com1,com2) rmsd=(get_RMSD(aligned_new_coords,coords2,weights)) - #print(rmsd) - if(rmsd' in line: - name=line.split()[0] - while(len(name)<7): - name=name+' ' - quadrupole=[float(line.split()[1]),float(line.split()[2]),float(line.split()[3]),float(line.split()[4])] - else: - quadrupole.append(float(line.split()[0])) - quadrupole.append(float(line.split()[1])) - rotated_quad=rotate_quadrupole(quadrupole, rotation_matrix) - temp.append(name+' '+f"{rotated_quad[0]:14.10f}"+' '+f"{rotated_quad[1]:14.10f}"+' ' - +f"{rotated_quad[2]:14.10f}"+' '+f"{rotated_quad[3]:14.10f}"+' >\n'+ - ' '+f"{rotated_quad[4]:14.10f}"+' '+f"{rotated_quad[5]:14.10f}"+'\n') - elif(start==4): - newline='' - if(j%3==0): - name=line.split()[0] - while(len(name)<7): - name=name+' ' - #print(line) - octupole=[float(line.split()[1]),float(line.split()[2]),float(line.split()[3]),float(line.split()[4])] - j+=1 - elif(j%3==1): - for i in range(4): - octupole.append(float(line.split()[i])) - j+=1 - else: - octupole.append(float(line.split()[0])) - octupole.append(float(line.split()[1])) - rotated_oct=rotate_octupole(octupole, rotation_matrix) - #print(rotated_oct) - temp.append(name+' '+f"{rotated_oct[0]:13.9f}"+' '+f"{rotated_oct[1]:13.9f}"+' ' - +f"{rotated_oct[2]:13.9f}"+' '+f"{rotated_oct[3]:13.9f}"+' >\n'+ - ' '+f"{rotated_oct[4]:13.9f}"+' '+f"{rotated_oct[5]:13.9f}"' ' - +f"{rotated_oct[6]:13.9f}"+' '+f"{rotated_oct[7]:13.9f}"+' >\n'+ - ' '+f"{rotated_oct[8]:13.9f}"+' '+f"{rotated_oct[9]:13.9f}"+'\n') - j=0 - elif(start==5): - if 'CT' in line: - newline=line.split()[0] - while(len(newline)<4): - newline=newline+' ' - coords=(apply_transform([float(line.split()[1]),float(line.split()[2]),float(line.split()[3])],rotation_matrix,com1,com2)) - for i in range(len(coords)): - newline+=f"{coords[i]:16.10f}" - newline+='\n' - temp.append(newline) - elif(j%3==0): - polar=[float(line.split()[0]),float(line.split()[1]),float(line.split()[2]),float(line.split()[3])] - j+=1 - elif(j%3==1): - for i in range(4): - polar.append(float(line.split()[i])) - j+=1 - else: - polar.append(float(line.split()[0])) - rotated_polar=rotate_polarizability(polar,rotation_matrix) - temp.append(' '+f"{rotated_polar[0]:14.10f}"+' '+f"{rotated_polar[1]:14.10f}"+' '+ - f"{rotated_polar[2]:14.10f}"+' '+f"{rotated_polar[3]:14.10f}"+' >\n'+ - ' '+f"{rotated_polar[4]:14.10f}"+' '+f"{rotated_polar[5]:14.10f}"+' '+ - f"{rotated_polar[6]:14.10f}"+' '+f"{rotated_polar[7]:14.10f}"+' >\n'+ - f"{rotated_polar[8]:16.10f}"+'\n') - j=0 - elif 'COORDINATES (BOHR)' in line: - start=1 - elif ' DIPOLES' in line: - start=2 - elif ' QUADRUPOLES' in line: - start=3 - temp.append(line) - elif ' OCTUPOLES' in line: - temp.append(line) - start=4 - j=0 - elif ' POLARIZABLE POINTS' in line: - start=5 - temp.append(line) - j=0 - if(start<3): - temp.append(newline) - - outfile=inp.split('.')[0]+'.efp' - with open(outfile,'w') as outfile: - for line1 in temp: - outfile.write(line1) +if(minrmsd