Skip to content

Commit b72a1b6

Browse files
author
Milan Koci
committed
Added hard/soft constraints, bug with multipliing persists (probably temperature dependant)
1 parent a653623 commit b72a1b6

8 files changed

Lines changed: 384 additions & 50 deletions

File tree

cpp/common/OpenCL/OCL_MM.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class OCL_MM: public OCLsystem { public:
7676

7777
// OpenCL buffers and textures ids
7878
int ibuff_atoms=-1,ibuff_aforces=-1,ibuff_neighs=-1,ibuff_neighCell=-1;
79-
int ibuff_avel=-1,ibuff_cvf=-1, ibuff_neighForce=-1, ibuff_bkNeighs=-1, ibuff_bkNeighs_new=-1;
79+
int ibuff_avel=-1,ibuff_cvf=-1, ibuff_fprev=-1, ibuff_neighForce=-1, ibuff_bkNeighs=-1, ibuff_bkNeighs_new=-1;
8080
int ibuff_REQs=-1, ibuff_MMpars=-1, ibuff_BLs=-1,ibuff_BKs=-1,ibuff_Ksp=-1, ibuff_Kpp=-1; // MMFFf4 params
8181
int ibuff_lvecs=-1, ibuff_ilvecs=-1,ibuff_MDpars=-1,ibuff_TDrive=-1, ibuff_pbcshifts=-1, ibuff_jeParams=-1;
8282
int ibuff_constr=-1;
@@ -225,6 +225,7 @@ class OCL_MM: public OCLsystem { public:
225225
ibuff_bkNeighs_new = newBuffer( "bkNeighs_new", nSystems*nvecs, sizeof(int4 ), 0, CL_MEM_READ_ONLY );
226226
ibuff_avel = newBuffer( "avel", nSystems*nvecs, sizeof(float4), 0, CL_MEM_READ_WRITE ); // atoms velocities (x,y,z,m)
227227
ibuff_cvf = newBuffer( "cvf", nSystems*nvecs , sizeof(float4), 0, CL_MEM_READ_WRITE );
228+
ibuff_fprev = newBuffer( "fprev", nSystems*nvecs , sizeof(float4), 0, CL_MEM_READ_WRITE );
228229
ibuff_neighForce = newBuffer( "neighForce", nSystems*nbkng, sizeof(float4), 0, CL_MEM_READ_WRITE );
229230

230231
ibuff_MMpars = newBuffer( "MMpars", nSystems*nnode, sizeof(int4), 0, CL_MEM_READ_ONLY );
@@ -608,6 +609,7 @@ class OCL_MM: public OCLsystem { public:
608609
err |= useArgBuff( ibuff_averageForces);// 15
609610
err |= useArgBuff( ibuff_work ); // 16
610611
err |= useArgBuff( ibuff_jeParams ); // 17
612+
err |= useArgBuff( ibuff_fprev ); // 18
611613
OCL_checkError(err, "setup_updateAtomsMMFFf4");
612614
return task;
613615
// const int4 n, // 1 // (natoms,nnode) dimensions of the system

cpp/common/molecular/MolWorld_sp3_multi.h

Lines changed: 127 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,45 @@ static inline float ti_hash_unit_f(uint32_t x){
2929
return (float)x * (1.0f/4294967296.0f);
3030
}
3131

32+
static inline Mat3f ti_rotation_between_dirs( const Vec3f& from, const Vec3f& to ){
33+
Mat3f rot = Mat3fIdentity;
34+
const float from_n2 = from.norm2();
35+
const float to_n2 = to .norm2();
36+
if( (from_n2 < 1.0e-12f) || (to_n2 < 1.0e-12f) ) return rot;
37+
Vec3f a = from*(1.0f/sqrtf(from_n2));
38+
Vec3f b = to *(1.0f/sqrtf(to_n2 ));
39+
float c = a.dot(b);
40+
if(c > 1.0f) c = 1.0f;
41+
if(c < -1.0f) c = -1.0f;
42+
if(c > 1.0f - 1.0e-6f) return rot;
43+
Vec3f axis;
44+
if(c < -1.0f + 1.0e-6f){
45+
axis = (fabsf(a.x) < 0.9f) ? Vec3f{1.0f,0.0f,0.0f} : Vec3f{0.0f,1.0f,0.0f};
46+
axis.makeOrthoU(a);
47+
const float axis_n2 = axis.norm2();
48+
if(axis_n2 < 1.0e-12f) return rot;
49+
axis.mul(1.0f/sqrtf(axis_n2));
50+
rot.fromRotation(3.14159265358979323846f, axis);
51+
return rot;
52+
}
53+
axis.set_cross(a,b);
54+
const float s2 = axis.norm2();
55+
if(s2 < 1.0e-12f) return rot;
56+
axis.mul(1.0f/sqrtf(s2));
57+
rot.fromRotation(atan2f(sqrtf(s2), c), axis);
58+
return rot;
59+
}
60+
61+
static inline Vec3f ti_precondition_atom_position( const Vec3f& p, const Vec3f& mid0, const Vec3f& e0, const Mat3f& rot, const Vec3f& mid1, const Vec3f& e1, float stretch ){
62+
Vec3f q = p - mid0;
63+
const float q_par = q.dot(e0);
64+
q.add_mul( e0, -q_par );
65+
Vec3f q_out = rot.dot(q);
66+
q_out.add_mul( e1, q_par * stretch );
67+
q_out.add(mid1);
68+
return q_out;
69+
}
70+
3271

3372

3473
struct FIRE_setup{
@@ -128,7 +167,7 @@ class MolWorld_sp3_multi : public MolWorld_sp3, public MultiSolverInterface { pu
128167
bool bGPU_MMFF = true;
129168

130169
bool initial = false;
131-
bool bHardConstrainedAtoms = true;
170+
bool bHardConstrainedAtoms = false;
132171
bool bSoftConstrainedAtoms = false;
133172
bool bHardConstrainedDistance = false;
134173
bool bSoftConstrainedDistance = false;
@@ -137,6 +176,7 @@ class MolWorld_sp3_multi : public MolWorld_sp3, public MultiSolverInterface { pu
137176
Quat4f* aforces =0;
138177
Quat4f* avel =0;
139178
Quat4f* cvfs =0;
179+
Quat4f* fprev =0;
140180

141181
FIRE* fire =0; // FIRE-relaxation state
142182
Quat4f* MDpars =0; // Molecular dynamics params
@@ -216,19 +256,11 @@ class MolWorld_sp3_multi : public MolWorld_sp3, public MultiSolverInterface { pu
216256

217257
const char* uploadPopName=0;
218258

219-
int iterPerFrame = 1000;
220-
221-
float* gpu_work = 0;
222-
223-
bool bSaveTrajectory = false;
224-
225-
char traj_fname[256] = "trajectory.xyz";
226-
227-
228-
229-
230-
231-
bool bMILAN = false;
259+
int iterPerFrame = 1000;
260+
float* gpu_work = 0;
261+
bool bSaveTrajectory = false;
262+
char traj_fname[256] = "trajectory.xyz";
263+
bool bMILAN = false;
232264

233265

234266
bool bSaveToDatabase=false;
@@ -273,24 +305,29 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
273305

274306
void resetTIBatchState(bool bUploadToGPU=true){
275307
ti_step = 0;
276-
const Quat4f tdrive0{0.0f,-1.0f,0.0f,0.0f};
308+
const Quat4f tdrive0 = bDeterministicTDrive
309+
? Quat4f{ (float)go.T_target, (float)go.gamma_damp, 0.0f, 0.0f }
310+
: Quat4f{ 0.0f, -1.0f, 0.0f, 0.0f };
277311
for(int isys=0; isys<nSystems; isys++){
278312
const int i0v = isys * ocl.nvecs;
279313
pack_system(isys, ffl0, false, false, false, true);
280314
for(int i=0; i<ocl.nvecs; i++){
281315
aforces[i0v+i] = Quat4fZero;
282316
avel [i0v+i] = Quat4fZero;
283317
cvfs [i0v+i] = Quat4fZero;
318+
fprev [i0v+i] = Quat4fZero;
284319
}
285320
averageForces[isys] = Quat4fZero;
286321
TDrive[isys] = tdrive0;
287322
fire[isys].bind_params( &fire_setup );
288323
fire[isys].id = isys;
289-
MDpars[isys] = Quat4f{ fire[isys].dt, 1.0f - fire[isys].damping, 1.0f, 0.0f };
324+
MDpars[isys] = bDeterministicTDrive
325+
? Quat4f{ fire[isys].par->dt_max, 1.0f, 1.0f, 0.0f }
326+
: Quat4f{ fire[isys].dt, 1.0f - fire[isys].damping, 1.0f, 0.0f };
290327
if(gopts){
291328
gopts[isys].copy(go);
292329
gopts[isys].istep = 0;
293-
gopts[isys].bExploring = bGopt;
330+
gopts[isys].bExploring = bDeterministicTDrive ? true : bGopt;
294331
}
295332
if(isSystemRelaxed){ isSystemRelaxed[isys] = false; }
296333
}
@@ -300,6 +337,7 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
300337
err |= ocl.upload( ocl.ibuff_aforces, aforces );
301338
err |= ocl.upload( ocl.ibuff_avel, avel );
302339
err |= ocl.upload( ocl.ibuff_cvf, cvfs );
340+
err |= ocl.upload( ocl.ibuff_fprev, fprev );
303341
err |= ocl.upload( ocl.ibuff_constr, constr );
304342
err |= ocl.upload( ocl.ibuff_constrK, constrK );
305343
err |= ocl.upload( ocl.ibuff_lvecs, lvecs );
@@ -321,7 +359,7 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
321359
}
322360

323361
int iSi1 = -1;
324-
int si_count = 0;
362+
int si_count = 0; // counts pairs of Si atoms
325363
for(int ia=0; ia<ffls[isys].natoms; ia++){
326364
if(ffls[isys].atypes[ia]==params.getAtomType("Si")){
327365
if(iSi1 == -1){ iSi1 = ia; }
@@ -349,16 +387,27 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
349387
if( bSoftConstrainedAtoms ){
350388
aconK1.w = K;
351389
aconK2.w = K;
352-
} else {
353-
Vec3f dir = p1_1 - p1_0; dir.normalize();
354-
aconK1.f = dir; aconK2.f = dir;
355390
}
356-
} else if ( bHardConstrainedDistance || bSoftConstrainedDistance ){
391+
aconK1.f.set_sub(p1_1, p1_0);
392+
aconK2.f.set_sub(p2_1, p2_0);
393+
}
394+
else if ( bHardConstrainedDistance || bSoftConstrainedDistance ){
357395
float type_offset = bHardConstrainedDistance ? 4.0e6f : 6.0e6f;
358396
acon1.w = 1e6f + type_offset;
359397
acon2.w = 2e6f + type_offset;
360398
acon1.x = (float)iSi2; acon1.y = L_target;
361399
acon2.x = (float)iSi1; acon2.y = L_target;
400+
Vec3f dT1, dT2, dD, h;
401+
dT1.set_sub(p1_1, p1_0);
402+
dT2.set_sub(p2_1, p2_0);
403+
dD.set_sub(dT1, dT2);
404+
h.set_sub(T1, T2);
405+
const float invL = (L_target > 1e-8f) ? (1.0f / L_target) : 0.0f;
406+
const float dLdl = h.dot(dD) * invL;
407+
// Each atom contributes the same radial projection; store half on each side
408+
// so the host-side sum reconstructs dU/dlambda only once per constrained pair.
409+
aconK1.x = 0.5f * dLdl;
410+
aconK2.x = 0.5f * dLdl;
362411
if( bSoftConstrainedDistance ){
363412
aconK1.w = K;
364413
aconK2.w = K;
@@ -371,6 +420,33 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
371420

372421
if( bAlign ){
373422
int i0v = isys * ocl.nvecs;
423+
// Precondition the whole system from the reference endpoint geometry to the
424+
// current TI window. This reduces large nonequilibrium transients caused by
425+
// moving only the constrained atoms while leaving the rest of the chain at
426+
// the lambda=0 geometry.
427+
if( (go.T_target > 1.0e-6) && (si_count == 0) ){
428+
const Vec3f P1_ref = atoms[i0v + iSi1].f;
429+
const Vec3f P2_ref = atoms[i0v + iSi2].f;
430+
Vec3f u0 = P1_ref - P2_ref;
431+
Vec3f u1 = T1 - T2;
432+
const float L0 = u0.norm();
433+
const float L1 = u1.norm();
434+
if( (L0 > 1.0e-6f) && (L1 > 1.0e-6f) ){
435+
const Vec3f mid0 = (P1_ref + P2_ref)*0.5f;
436+
const Vec3f mid1 = (T1 + T2)*0.5f;
437+
const Vec3f e0 = u0*(1.0f/L0);
438+
const Vec3f e1 = u1*(1.0f/L1);
439+
const float stretch = L1/L0;
440+
const Mat3f rot = ti_rotation_between_dirs( u0, u1 );
441+
for(int i=0; i<ocl.nvecs; i++){
442+
if(i < ocl.nAtoms){
443+
atoms[i0v + i].f = ti_precondition_atom_position( atoms[i0v + i].f, mid0, e0, rot, mid1, e1, stretch );
444+
}else{
445+
atoms[i0v + i].f = rot.dot( atoms[i0v + i].f );
446+
}
447+
}
448+
}
449+
}
374450
atoms[i0v + iSi1] = (Quat4f){T1.x, T1.y, T1.z, 0.0f};
375451
atoms[i0v + iSi2] = (Quat4f){T2.x, T2.y, T2.z, 0.0f};
376452
}
@@ -392,6 +468,21 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
392468
_setbool( bSoftConstrainedDistance, softDist );
393469
#undef _setbool
394470

471+
int nConstraintModes =
472+
(int)bHardConstrainedAtoms +
473+
(int)bSoftConstrainedAtoms +
474+
(int)bHardConstrainedDistance +
475+
(int)bSoftConstrainedDistance;
476+
if(nConstraintModes == 0){
477+
bHardConstrainedAtoms = true;
478+
nConstraintModes = 1;
479+
printf("WARNING: computeFreeEnergy() no TI constraint mode selected; defaulting to hard atom constraints.\n");
480+
}
481+
if(nConstraintModes > 1){
482+
printf("ERROR: computeFreeEnergy() expects exactly one TI constraint mode, got %d.\n", nConstraintModes);
483+
return NAN;
484+
}
485+
395486
// mode: 0=TI, 1=JE, 2=Both
396487
bool doTI = (mode == 0) || (mode == 2);
397488
bool doJE = (mode == 1) || (mode == 2);
@@ -475,6 +566,7 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
475566
float lambda = (float)il / (float)(nLambda - 1);
476567
setupTIConstraints( isys, lambda, nCVs, initial_positions, final_positions, true, K );
477568
}
569+
ocl.upload( ocl.ibuff_TDrive, TDrive );
478570
ocl.upload( ocl.ibuff_constr, constr );
479571
ocl.upload( ocl.ibuff_constrK, constrK );
480572
ocl.upload( ocl.ibuff_atoms, atoms ); // Upload potentially aligned atoms
@@ -770,6 +862,7 @@ void realloc( int nSystems_ ){
770862
_realloc0( aforces, ocl.nvecs*nSystems , Quat4fZero );
771863
_realloc0( avel, ocl.nvecs*nSystems , Quat4fZero );
772864
_realloc0( cvfs, ocl.nvecs*nSystems , Quat4fZero );
865+
_realloc0( fprev, ocl.nvecs*nSystems , Quat4fZero );
773866
_realloc0( constr, ocl.nAtoms*nSystems , Quat4fOnes*-1. );
774867
_realloc0( constrK, ocl.nAtoms*nSystems , Quat4fOnes*-1. );
775868

@@ -1407,9 +1500,19 @@ double evalVFs( double Fconv=1e-6 ){
14071500
nbEvaluation+=nPerVFs;
14081501
int i0v = isys * ocl.nvecs;
14091502
//evalVF( ocl.nvecs, aforces+i0v, avel +i0v, fire[isys], MDpars[isys] );
1410-
evalVF_new( ocl.nvecs, cvfs+i0v, fire[isys], MDpars[isys], gopts[isys].bExploring );
1503+
const bool bSamplingTI = bDeterministicTDrive;
1504+
evalVF_new( ocl.nvecs, cvfs+i0v, fire[isys], MDpars[isys], bSamplingTI || gopts[isys].bExploring );
14111505
double f2 = fire[isys].ff;
14121506
if(f2>F2max){ F2max=f2; iSysFMax=isys; }
1507+
if(bSamplingTI){
1508+
TDrive[isys].x = go.T_target;
1509+
TDrive[isys].y = go.gamma_damp;
1510+
uint32_t lambda_id = (TDrive[isys].z >= 0.0f) ? (uint32_t)(TDrive[isys].z + 0.5f) : (uint32_t)isys;
1511+
const uint32_t step_id = (uint32_t)ti_step;
1512+
const uint32_t seed = ti_hash_u32(lambda_id * 0x9E3779B9u ^ step_id);
1513+
TDrive[isys].w = ti_hash_unit_f(seed) * 2.0f - 1.0f;
1514+
continue;
1515+
}
14131516
// -------- Global Optimization
14141517
if( ( f2 < F2conv ) && (!gopts[isys].bExploring) ){
14151518
int i0v = isys * ocl.nvecs;
@@ -2318,7 +2421,7 @@ int run_ocl_opt( int niter, double Fconv=1e-6 ){
23182421
ocl.upload( ocl.ibuff_gforces, gforces );
23192422
}
23202423

2321-
if (bGopt){
2424+
if (bGopt && !bDeterministicTDrive){
23222425
//printf("MolWorld_sp3_multi::run_ocl_opt() bGopt=%i bGroups=%i \n", bGopt, bGroups );
23232426
bExplore = false;
23242427
for (int isys = 0; isys < nSystems; isys++){

0 commit comments

Comments
 (0)