@@ -29,6 +29,45 @@ static inline float ti_hash_unit_f(uint32_t x){
2929 return (float )x * (1 .0f /4294967296 .0f );
3030}
3131
32+ static inline Mat3f ti_rotation_between_dirs ( const Vec3f& from, const Vec3f& to ){
33+ Mat3f rot = Mat3fIdentity;
34+ const float from_n2 = from.norm2 ();
35+ const float to_n2 = to .norm2 ();
36+ if ( (from_n2 < 1 .0e-12f ) || (to_n2 < 1 .0e-12f ) ) return rot;
37+ Vec3f a = from*(1 .0f /sqrtf (from_n2));
38+ Vec3f b = to *(1 .0f /sqrtf (to_n2 ));
39+ float c = a.dot (b);
40+ if (c > 1 .0f ) c = 1 .0f ;
41+ if (c < -1 .0f ) c = -1 .0f ;
42+ if (c > 1 .0f - 1 .0e-6f ) return rot;
43+ Vec3f axis;
44+ if (c < -1 .0f + 1 .0e-6f ){
45+ axis = (fabsf (a.x ) < 0 .9f ) ? Vec3f{1 .0f ,0 .0f ,0 .0f } : Vec3f{0 .0f ,1 .0f ,0 .0f };
46+ axis.makeOrthoU (a);
47+ const float axis_n2 = axis.norm2 ();
48+ if (axis_n2 < 1 .0e-12f ) return rot;
49+ axis.mul (1 .0f /sqrtf (axis_n2));
50+ rot.fromRotation (3 .14159265358979323846f , axis);
51+ return rot;
52+ }
53+ axis.set_cross (a,b);
54+ const float s2 = axis.norm2 ();
55+ if (s2 < 1 .0e-12f ) return rot;
56+ axis.mul (1 .0f /sqrtf (s2));
57+ rot.fromRotation (atan2f (sqrtf (s2), c), axis);
58+ return rot;
59+ }
60+
61+ static inline Vec3f ti_precondition_atom_position ( const Vec3f& p, const Vec3f& mid0, const Vec3f& e0 , const Mat3f& rot, const Vec3f& mid1, const Vec3f& e1 , float stretch ){
62+ Vec3f q = p - mid0;
63+ const float q_par = q.dot (e0 );
64+ q.add_mul ( e0 , -q_par );
65+ Vec3f q_out = rot.dot (q);
66+ q_out.add_mul ( e1 , q_par * stretch );
67+ q_out.add (mid1);
68+ return q_out;
69+ }
70+
3271
3372
3473struct FIRE_setup {
@@ -128,7 +167,7 @@ class MolWorld_sp3_multi : public MolWorld_sp3, public MultiSolverInterface { pu
128167 bool bGPU_MMFF = true ;
129168
130169 bool initial = false ;
131- bool bHardConstrainedAtoms = true ;
170+ bool bHardConstrainedAtoms = false ;
132171 bool bSoftConstrainedAtoms = false ;
133172 bool bHardConstrainedDistance = false ;
134173 bool bSoftConstrainedDistance = false ;
@@ -137,6 +176,7 @@ class MolWorld_sp3_multi : public MolWorld_sp3, public MultiSolverInterface { pu
137176 Quat4f* aforces =0 ;
138177 Quat4f* avel =0 ;
139178 Quat4f* cvfs =0 ;
179+ Quat4f* fprev =0 ;
140180
141181 FIRE* fire =0 ; // FIRE-relaxation state
142182 Quat4f* MDpars =0 ; // Molecular dynamics params
@@ -216,19 +256,11 @@ class MolWorld_sp3_multi : public MolWorld_sp3, public MultiSolverInterface { pu
216256
217257 const char * uploadPopName=0 ;
218258
219- int iterPerFrame = 1000 ;
220-
221- float * gpu_work = 0 ;
222-
223- bool bSaveTrajectory = false ;
224-
225- char traj_fname[256 ] = " trajectory.xyz" ;
226-
227-
228-
229-
230-
231- bool bMILAN = false ;
259+ int iterPerFrame = 1000 ;
260+ float * gpu_work = 0 ;
261+ bool bSaveTrajectory = false ;
262+ char traj_fname[256 ] = " trajectory.xyz" ;
263+ bool bMILAN = false ;
232264
233265
234266 bool bSaveToDatabase=false ;
@@ -273,24 +305,29 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
273305
274306 void resetTIBatchState (bool bUploadToGPU=true ){
275307 ti_step = 0 ;
276- const Quat4f tdrive0{0 .0f ,-1 .0f ,0 .0f ,0 .0f };
308+ const Quat4f tdrive0 = bDeterministicTDrive
309+ ? Quat4f{ (float )go.T_target , (float )go.gamma_damp , 0 .0f , 0 .0f }
310+ : Quat4f{ 0 .0f , -1 .0f , 0 .0f , 0 .0f };
277311 for (int isys=0 ; isys<nSystems; isys++){
278312 const int i0v = isys * ocl.nvecs ;
279313 pack_system (isys, ffl0, false , false , false , true );
280314 for (int i=0 ; i<ocl.nvecs ; i++){
281315 aforces[i0v+i] = Quat4fZero;
282316 avel [i0v+i] = Quat4fZero;
283317 cvfs [i0v+i] = Quat4fZero;
318+ fprev [i0v+i] = Quat4fZero;
284319 }
285320 averageForces[isys] = Quat4fZero;
286321 TDrive[isys] = tdrive0;
287322 fire[isys].bind_params ( &fire_setup );
288323 fire[isys].id = isys;
289- MDpars[isys] = Quat4f{ fire[isys].dt , 1 .0f - fire[isys].damping , 1 .0f , 0 .0f };
324+ MDpars[isys] = bDeterministicTDrive
325+ ? Quat4f{ fire[isys].par ->dt_max , 1 .0f , 1 .0f , 0 .0f }
326+ : Quat4f{ fire[isys].dt , 1 .0f - fire[isys].damping , 1 .0f , 0 .0f };
290327 if (gopts){
291328 gopts[isys].copy (go);
292329 gopts[isys].istep = 0 ;
293- gopts[isys].bExploring = bGopt;
330+ gopts[isys].bExploring = bDeterministicTDrive ? true : bGopt;
294331 }
295332 if (isSystemRelaxed){ isSystemRelaxed[isys] = false ; }
296333 }
@@ -300,6 +337,7 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
300337 err |= ocl.upload ( ocl.ibuff_aforces , aforces );
301338 err |= ocl.upload ( ocl.ibuff_avel , avel );
302339 err |= ocl.upload ( ocl.ibuff_cvf , cvfs );
340+ err |= ocl.upload ( ocl.ibuff_fprev , fprev );
303341 err |= ocl.upload ( ocl.ibuff_constr , constr );
304342 err |= ocl.upload ( ocl.ibuff_constrK , constrK );
305343 err |= ocl.upload ( ocl.ibuff_lvecs , lvecs );
@@ -321,7 +359,7 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
321359 }
322360
323361 int iSi1 = -1 ;
324- int si_count = 0 ;
362+ int si_count = 0 ; // counts pairs of Si atoms
325363 for (int ia=0 ; ia<ffls[isys].natoms ; ia++){
326364 if (ffls[isys].atypes [ia]==params.getAtomType (" Si" )){
327365 if (iSi1 == -1 ){ iSi1 = ia; }
@@ -349,16 +387,27 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
349387 if ( bSoftConstrainedAtoms ){
350388 aconK1.w = K;
351389 aconK2.w = K;
352- } else {
353- Vec3f dir = p1_1 - p1_0; dir.normalize ();
354- aconK1.f = dir; aconK2.f = dir;
355390 }
356- } else if ( bHardConstrainedDistance || bSoftConstrainedDistance ){
391+ aconK1.f .set_sub (p1_1, p1_0);
392+ aconK2.f .set_sub (p2_1, p2_0);
393+ }
394+ else if ( bHardConstrainedDistance || bSoftConstrainedDistance ){
357395 float type_offset = bHardConstrainedDistance ? 4 .0e6f : 6 .0e6f;
358396 acon1.w = 1e6f + type_offset;
359397 acon2.w = 2e6f + type_offset;
360398 acon1.x = (float )iSi2; acon1.y = L_target;
361399 acon2.x = (float )iSi1; acon2.y = L_target;
400+ Vec3f dT1, dT2, dD, h;
401+ dT1.set_sub (p1_1, p1_0);
402+ dT2.set_sub (p2_1, p2_0);
403+ dD.set_sub (dT1, dT2);
404+ h.set_sub (T1, T2);
405+ const float invL = (L_target > 1e-8f ) ? (1 .0f / L_target) : 0 .0f ;
406+ const float dLdl = h.dot (dD) * invL;
407+ // Each atom contributes the same radial projection; store half on each side
408+ // so the host-side sum reconstructs dU/dlambda only once per constrained pair.
409+ aconK1.x = 0 .5f * dLdl;
410+ aconK2.x = 0 .5f * dLdl;
362411 if ( bSoftConstrainedDistance ){
363412 aconK1.w = K;
364413 aconK2.w = K;
@@ -371,6 +420,33 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
371420
372421 if ( bAlign ){
373422 int i0v = isys * ocl.nvecs ;
423+ // Precondition the whole system from the reference endpoint geometry to the
424+ // current TI window. This reduces large nonequilibrium transients caused by
425+ // moving only the constrained atoms while leaving the rest of the chain at
426+ // the lambda=0 geometry.
427+ if ( (go.T_target > 1.0e-6 ) && (si_count == 0 ) ){
428+ const Vec3f P1_ref = atoms[i0v + iSi1].f ;
429+ const Vec3f P2_ref = atoms[i0v + iSi2].f ;
430+ Vec3f u0 = P1_ref - P2_ref;
431+ Vec3f u1 = T1 - T2;
432+ const float L0 = u0.norm ();
433+ const float L1 = u1.norm ();
434+ if ( (L0 > 1 .0e-6f ) && (L1 > 1 .0e-6f ) ){
435+ const Vec3f mid0 = (P1_ref + P2_ref)*0 .5f ;
436+ const Vec3f mid1 = (T1 + T2)*0 .5f ;
437+ const Vec3f e0 = u0*(1 .0f /L0);
438+ const Vec3f e1 = u1*(1 .0f /L1);
439+ const float stretch = L1/L0;
440+ const Mat3f rot = ti_rotation_between_dirs ( u0, u1 );
441+ for (int i=0 ; i<ocl.nvecs ; i++){
442+ if (i < ocl.nAtoms ){
443+ atoms[i0v + i].f = ti_precondition_atom_position ( atoms[i0v + i].f , mid0, e0 , rot, mid1, e1 , stretch );
444+ }else {
445+ atoms[i0v + i].f = rot.dot ( atoms[i0v + i].f );
446+ }
447+ }
448+ }
449+ }
374450 atoms[i0v + iSi1] = (Quat4f){T1.x , T1.y , T1.z , 0 .0f };
375451 atoms[i0v + iSi2] = (Quat4f){T2.x , T2.y , T2.z , 0 .0f };
376452 }
@@ -392,6 +468,21 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
392468 _setbool ( bSoftConstrainedDistance, softDist );
393469 #undef _setbool
394470
471+ int nConstraintModes =
472+ (int )bHardConstrainedAtoms +
473+ (int )bSoftConstrainedAtoms +
474+ (int )bHardConstrainedDistance +
475+ (int )bSoftConstrainedDistance;
476+ if (nConstraintModes == 0 ){
477+ bHardConstrainedAtoms = true ;
478+ nConstraintModes = 1 ;
479+ printf (" WARNING: computeFreeEnergy() no TI constraint mode selected; defaulting to hard atom constraints.\n " );
480+ }
481+ if (nConstraintModes > 1 ){
482+ printf (" ERROR: computeFreeEnergy() expects exactly one TI constraint mode, got %d.\n " , nConstraintModes);
483+ return NAN;
484+ }
485+
395486 // mode: 0=TI, 1=JE, 2=Both
396487 bool doTI = (mode == 0 ) || (mode == 2 );
397488 bool doJE = (mode == 1 ) || (mode == 2 );
@@ -475,6 +566,7 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
475566 float lambda = (float )il / (float )(nLambda - 1 );
476567 setupTIConstraints ( isys, lambda, nCVs, initial_positions, final_positions, true , K );
477568 }
569+ ocl.upload ( ocl.ibuff_TDrive , TDrive );
478570 ocl.upload ( ocl.ibuff_constr , constr );
479571 ocl.upload ( ocl.ibuff_constrK , constrK );
480572 ocl.upload ( ocl.ibuff_atoms , atoms ); // Upload potentially aligned atoms
@@ -770,6 +862,7 @@ void realloc( int nSystems_ ){
770862 _realloc0 ( aforces, ocl.nvecs *nSystems , Quat4fZero );
771863 _realloc0 ( avel, ocl.nvecs *nSystems , Quat4fZero );
772864 _realloc0 ( cvfs, ocl.nvecs *nSystems , Quat4fZero );
865+ _realloc0 ( fprev, ocl.nvecs *nSystems , Quat4fZero );
773866 _realloc0 ( constr, ocl.nAtoms *nSystems , Quat4fOnes*-1 . );
774867 _realloc0 ( constrK, ocl.nAtoms *nSystems , Quat4fOnes*-1 . );
775868
@@ -1407,9 +1500,19 @@ double evalVFs( double Fconv=1e-6 ){
14071500 nbEvaluation+=nPerVFs;
14081501 int i0v = isys * ocl.nvecs ;
14091502 // evalVF( ocl.nvecs, aforces+i0v, avel +i0v, fire[isys], MDpars[isys] );
1410- evalVF_new ( ocl.nvecs , cvfs+i0v, fire[isys], MDpars[isys], gopts[isys].bExploring );
1503+ const bool bSamplingTI = bDeterministicTDrive;
1504+ evalVF_new ( ocl.nvecs , cvfs+i0v, fire[isys], MDpars[isys], bSamplingTI || gopts[isys].bExploring );
14111505 double f2 = fire[isys].ff ;
14121506 if (f2>F2max){ F2max=f2; iSysFMax=isys; }
1507+ if (bSamplingTI){
1508+ TDrive[isys].x = go.T_target ;
1509+ TDrive[isys].y = go.gamma_damp ;
1510+ uint32_t lambda_id = (TDrive[isys].z >= 0 .0f ) ? (uint32_t )(TDrive[isys].z + 0 .5f ) : (uint32_t )isys;
1511+ const uint32_t step_id = (uint32_t )ti_step;
1512+ const uint32_t seed = ti_hash_u32 (lambda_id * 0x9E3779B9u ^ step_id);
1513+ TDrive[isys].w = ti_hash_unit_f (seed) * 2 .0f - 1 .0f ;
1514+ continue ;
1515+ }
14131516 // -------- Global Optimization
14141517 if ( ( f2 < F2conv ) && (!gopts[isys].bExploring ) ){
14151518 int i0v = isys * ocl.nvecs ;
@@ -2318,7 +2421,7 @@ int run_ocl_opt( int niter, double Fconv=1e-6 ){
23182421 ocl.upload ( ocl.ibuff_gforces , gforces );
23192422 }
23202423
2321- if (bGopt){
2424+ if (bGopt && !bDeterministicTDrive ){
23222425 // printf("MolWorld_sp3_multi::run_ocl_opt() bGopt=%i bGroups=%i \n", bGopt, bGroups );
23232426 bExplore = false ;
23242427 for (int isys = 0 ; isys < nSystems; isys++){
0 commit comments