@@ -350,6 +350,224 @@ void TI_step(double lambda, double dE, double sigma, double dLambda, int nMDstep
350350 OCL_checkError (err, " resetTIBatchState().upload" );
351351 }
352352
353+ inline void clearSystemGPUConstraints ( int isys ){
354+ const int i0a = isys * ocl.nAtoms ;
355+ for (int ia=0 ; ia<ocl.nAtoms ; ia++){
356+ constr [i0a + ia] = Quat4f{0 .0f , 0 .0f , 0 .0f , -1 .0f };
357+ constrK[i0a + ia] = Quat4fZero;
358+ }
359+ }
360+
361+ void setupTIconstraints_debug ( int isys, double lambda_abs, int nCVs, int * dc ){
362+ clearSystemGPUConstraints ( isys );
363+ const int i0a = isys * ocl.nAtoms ;
364+ const float ltarget = (float )(lambda_abs / (double )nCVs);
365+ for (int icv=0 ; icv<nCVs; icv++){
366+ const DistConstr& C = constrs.bonds [ dc[icv] ];
367+ const int ia = C.ias .a ;
368+ const int ib = C.ias .b ;
369+ if ( (ia<0 ) || (ib<0 ) || (ia>=ocl.nAtoms ) || (ib>=ocl.nAtoms ) ) continue ;
370+ // Mirror the CPU harmonic bond constraint using the existing GPU soft-distance mode.
371+ constr [i0a + ia] = Quat4f{ (float )ib, ltarget, 0 .0f , 7 .0e6f };
372+ constr [i0a + ib] = Quat4f{ (float )ia, ltarget, 0 .0f , 8 .0e6f };
373+ // Soft-distance branch in updateAtomsMMFFf4() multiplies the projected force by cK.x,
374+ // while cK.w carries the restraint stiffness. Keep both populated for TI debug parity.
375+ constrK[i0a + ia] = Quat4f{ 1 .0f , 0 .0f , 0 .0f , (float )C.ks .x };
376+ constrK[i0a + ib] = Quat4f{ 1 .0f , 0 .0f , 0 .0f , (float )C.ks .x };
377+ }
378+ }
379+
380+ int run_ocl_TI_debug ( int nsteps, double dt, double T, double gamma ){
381+ if (task_MMFF==0 ) setup_MMFFf4_ocl ();
382+ int err = 0 ;
383+ for (int istep=0 ; istep<nsteps; istep++){
384+ for (int isys=0 ; isys<nSystems; isys++){
385+ MDpars[isys] = Quat4f{ (float )dt, 1 .0f , 1 .0f , 0 .0f };
386+ // Keep z>=0 so the OpenCL kernel takes the same Langevin velocity-Verlet path
387+ // that is intended for TI sampling, but we avoid the higher-level FIRE/evalVFs logic.
388+ TDrive[isys] = Quat4f{ (float )T, (float )gamma, 0 .0f , randf (-1.0 ,1.0 ) };
389+ }
390+ err |= ocl.upload ( ocl.ibuff_MDpars , MDpars );
391+ err |= ocl.upload ( ocl.ibuff_TDrive , TDrive );
392+ err |= task_cleanF->enque_raw ();
393+ if (bNonBonded){
394+ if (bGridFF){
395+ if (gridFF.mode == GridFFmod::BsplineDouble){
396+ err |= task_NBFF_Grid_Bspline->enque_raw ();
397+ }else {
398+ err |= task_NBFF_Grid->enque_raw ();
399+ }
400+ }else if (bSurfAtoms){
401+ err |= task_NBFF ->enque_raw ();
402+ err |= task_SurfAtoms->enque_raw ();
403+ }else {
404+ err |= task_NBFF->enque_raw ();
405+ }
406+ }
407+ err |= task_MMFF->enque_raw ();
408+ err |= task_move->enque_raw ();
409+ err |= ocl.finishRaw ();
410+ OCL_checkError (err, " run_ocl_TI_debug()" );
411+ err = 0 ;
412+ nloop++;
413+ }
414+ return nsteps;
415+ }
416+
417+ double entropic_spring_TI_gpu_debug (
418+ double lamda1, double lamda2, int n, int * dc,
419+ int nbStep = 100 , int nMDsteps = 100000 , int nEQsteps = 10000 ,
420+ double tdamp = 100.0 , double T = 300 , double dt = 0.05 , double Fconv = 1e-6
421+ ){
422+ printf (" Running entropic_spring_TI_gpu_debug with lamda1=%g, lamda2=%g, n=%d, nbStep=%d, nMDsteps=%d, nEQsteps=%d, tdamp=%g, T=%g, dt=%g, Fconv=%g\n " ,
423+ lamda1, lamda2, n, nbStep, nMDsteps, nEQsteps, tdamp, T, dt, Fconv
424+ );
425+ if (n <= 0 ){
426+ printf (" ERROR: entropic_spring_TI_gpu_debug() requires at least one constraint index.\n " );
427+ return NAN;
428+ }
429+ if (nbStep <= 1 ){
430+ printf (" ERROR: entropic_spring_TI_gpu_debug() requires nbStep > 1.\n " );
431+ return NAN;
432+ }
433+
434+ const bool prev_doAngles = ffl.doAngles ;
435+ const bool prev_doPiPiI = ffl.doPiPiI ;
436+ const bool prev_doPiPiT = ffl.doPiPiT ;
437+ const bool prev_doPiSigma = ffl.doPiSigma ;
438+ const bool prev_doBonds = ffl.doBonds ;
439+ const bool prev_subBondNonBond = ffl.bSubtractBondNonBond ;
440+ const bool prev_bNonBonded = bNonBonded;
441+ const bool prev_bConstrains = bConstrains;
442+ const bool prev_bFreeEnergyCalc = bFreeEnergyCalc;
443+ const bool prev_bDeterministic = bDeterministicTDrive;
444+ const bool prev_bGopt = bGopt;
445+ const double prev_gamma = go.gamma_damp ;
446+ const double prev_T = go.T_target ;
447+ const bool prev_goExploring = go.bExploring ;
448+ std::vector<Vec2d> ls_bak (n);
449+ for (int i=0 ; i<n; i++){ ls_bak[i] = constrs.bonds [dc[i]].ls ; }
450+
451+ ffl.doAngles = false ;
452+ ffl.doPiPiI = false ;
453+ ffl.doPiPiT = false ;
454+ ffl.doPiSigma = false ;
455+ ffl.doBonds = true ;
456+ ffl.bSubtractBondNonBond = false ;
457+ bNonBonded = false ;
458+ bConstrains = true ;
459+ bFreeEnergyCalc = true ;
460+ bDeterministicTDrive = false ;
461+ bGopt = true ;
462+ go.gamma_damp = 1.0 / (dt * tdamp);
463+ go.T_target = T;
464+ go.bExploring = true ;
465+
466+ std::vector<double > lamda (nbStep);
467+ const double d_lamda = (lamda2 - lamda1) / (double )(nbStep - 1 );
468+ for (int i=0 ; i<nbStep; i++){ lamda[i] = lamda1 + d_lamda*(double )i; }
469+
470+ std::vector<std::vector<double >> Energy ( nbStep, std::vector<double >(nMDsteps, 0.0 ) );
471+ std::vector<std::vector<double >> dE_dLamda ( nbStep, std::vector<double >(nMDsteps, 0.0 ) );
472+ std::vector<std::vector<double >> dist ( nbStep, std::vector<double >(nMDsteps, 0.0 ) );
473+ std::vector<std::vector<double >> dist_ee ( nbStep, std::vector<double >(nMDsteps, 0.0 ) );
474+ std::vector<double > TI (nbStep,0.0 ), sigmaTI (nbStep,0.0 ), Ref (nbStep,0.0 );
475+ std::vector<double > avgF_x (nbStep,0.0 ), avgF_y (nbStep,0.0 ), avgF_z (nbStep,0.0 ), avgF_w (nbStep,0.0 );
476+
477+ const DistConstr cv0 = constrs.bonds [ dc[0 ] ];
478+ const int nBatches = (nbStep + nSystems - 1 ) / nSystems;
479+ const int nExplore_bak = go.nExplore ;
480+ const int nRelax_bak = go.nRelax ;
481+ go.nExplore = nEQsteps + nMDsteps + 1024 ;
482+ go.nRelax = 0 ;
483+
484+ for (int batch=0 ; batch<nBatches; batch++){
485+ resetTIBatchState (true );
486+ for (int isys=0 ; isys<nSystems; isys++){
487+ const int il = isys + batch*nSystems;
488+ if (il >= nbStep){
489+ TDrive[isys] = Quat4f{0 .0f ,-1 .0f ,0 .0f ,0 .0f };
490+ clearSystemGPUConstraints (isys);
491+ continue ;
492+ }
493+ const double lambda_abs = lamda[il];
494+ setupTIconstraints_debug ( isys, lambda_abs, n, dc );
495+ TDrive[isys] = Quat4f{ (float )T, (float )go.gamma_damp , -1 .0f , randf (-1.0 ,1.0 ) };
496+ if (gopts){
497+ gopts[isys].copy (go);
498+ gopts[isys].istep = 0 ;
499+ gopts[isys].bExploring = true ;
500+ }
501+ }
502+ int err = 0 ;
503+ err |= ocl.upload ( ocl.ibuff_constr , constr );
504+ err |= ocl.upload ( ocl.ibuff_constrK , constrK );
505+ err |= ocl.upload ( ocl.ibuff_TDrive , TDrive );
506+ err |= ocl.finishRaw ();
507+ OCL_checkError (err, " entropic_spring_TI_gpu_debug().uploadWindow" );
508+
509+ if (nEQsteps > 0 ){ run_ocl_TI_debug ( nEQsteps, dt, T, go.gamma_damp ); }
510+
511+ for (int istep=0 ; istep<nMDsteps; istep++){
512+ for (int isys=0 ; isys<nSystems; isys++){ averageForces[isys] = Quat4fZero; }
513+ err = 0 ;
514+ err |= ocl.upload ( ocl.ibuff_averageForces , averageForces );
515+ err |= ocl.finishRaw ();
516+ OCL_checkError (err, " entropic_spring_TI_gpu_debug().zeroAverageForces" );
517+
518+ run_ocl_TI_debug ( 1 , dt, T, go.gamma_damp );
519+ err = 0 ;
520+ err |= ocl.download ( ocl.ibuff_averageForces , averageForces );
521+ err |= ocl.download ( ocl.ibuff_atoms , atoms );
522+ err |= ocl.finishRaw ();
523+ OCL_checkError (err, " entropic_spring_TI_gpu_debug().downloadStepData" );
524+
525+ for (int isys=0 ; isys<nSystems; isys++){
526+ const int il = isys + batch*nSystems;
527+ if (il >= nbStep) continue ;
528+ const double lambda_abs = lamda[il];
529+ for (int i=0 ; i<n; i++){ constrs.bonds [dc[i]].ls .set ( lambda_abs / (double )n ); }
530+ unpack_system ( isys, ffls[isys], false , false );
531+ ffl.copyOf ( ffls[isys] );
532+ Energy [il][istep] = NAN;
533+ dE_dLamda[il][istep] = 0.5 * ( (double )averageForces[isys].z - (double )averageForces[isys].w );
534+ avgF_x[il] += averageForces[isys].x ;
535+ avgF_y[il] += averageForces[isys].y ;
536+ avgF_z[il] += averageForces[isys].z ;
537+ avgF_w[il] += averageForces[isys].w ; }
538+ }
539+ }
540+
541+ thermodynamic_integration ( nbStep, nMDsteps, d_lamda, dE_dLamda.data (), TI.data (), sigmaTI.data () );
542+ for (int L=1 ; L<nbStep; L++){
543+ double lamda_bar = 0.0 ;
544+ for (int i=0 ; i<nMDsteps; i++){ lamda_bar += dist[L][i]; }
545+ lamda_bar /= (double )nMDsteps;
546+ const double constant = 3.0 * const_kB * T / ( lamda_bar*lamda_bar * (double )(ffl.natoms - 1 ) );
547+ Ref[L] = Ref[L-1 ] + 0.5 * constant * ( lamda[L] + lamda[L-1 ] ) * d_lamda;
548+ }
549+ store_TI ( " results/TI_plot_ES_GPU_DEBUG.dat" , lamda, TI, sigmaTI, Ref );
550+
551+ go.nExplore = nExplore_bak;
552+ go.nRelax = nRelax_bak;
553+ for (int i=0 ; i<n; i++){ constrs.bonds [dc[i]].ls = ls_bak[i]; }
554+ ffl.doAngles = prev_doAngles;
555+ ffl.doPiPiI = prev_doPiPiI;
556+ ffl.doPiPiT = prev_doPiPiT;
557+ ffl.doPiSigma = prev_doPiSigma;
558+ ffl.doBonds = prev_doBonds;
559+ ffl.bSubtractBondNonBond = prev_subBondNonBond;
560+ bNonBonded = prev_bNonBonded;
561+ bConstrains = prev_bConstrains;
562+ bFreeEnergyCalc = prev_bFreeEnergyCalc;
563+ bDeterministicTDrive = prev_bDeterministic;
564+ bGopt = prev_bGopt;
565+ go.gamma_damp = prev_gamma;
566+ go.T_target = prev_T;
567+ go.bExploring = prev_goExploring;
568+ return TI[nbStep-1 ];
569+ }
570+
353571
354572 void setupTIConstraints ( int isys, float lambda, int nCVs, Vec3f* initial_positions, Vec3f* final_positions, bool bAlign=false , float K=10 .0f ){
355573 const int i0a = isys * ocl.nAtoms ;
0 commit comments