88
99#include < thread>
1010#include < chrono>
11+ #include < vector>
1112
1213// ============ Global Variables
1314
@@ -20,6 +21,106 @@ MolWorld_sp3_multi W;
2021
2122extern " C" {
2223
24+ static void assembleMMFFforcesFromRecoil (){
25+ const int nvecTot = W.ocl .nvecs * W.nSystems ;
26+ const int nbkTot = W.ocl .nbkng * W.nSystems ;
27+ static std::vector<Quat4f> neighForce;
28+ neighForce.resize (nbkTot);
29+
30+ int err=0 ;
31+ err |= W.ocl .download ( W.ocl .ibuff_aforces , W.aforces );
32+ err |= W.ocl .download ( W.ocl .ibuff_neighForce , neighForce.data () );
33+ err |= W.ocl .finishRaw ();
34+ OCL_checkError (err, " assembleMMFFforcesFromRecoil().download" );
35+
36+ for (int i=0 ; i<nvecTot; i++){
37+ Quat4f fe = W.aforces [i];
38+ const Quat4i ngs = W.bkNeighs [i];
39+ if (ngs.x >=0 ){ const Quat4f& q = neighForce[ngs.x ]; fe.x +=q.x ; fe.y +=q.y ; fe.z +=q.z ; fe.w +=q.w ; }
40+ if (ngs.y >=0 ){ const Quat4f& q = neighForce[ngs.y ]; fe.x +=q.x ; fe.y +=q.y ; fe.z +=q.z ; fe.w +=q.w ; }
41+ if (ngs.z >=0 ){ const Quat4f& q = neighForce[ngs.z ]; fe.x +=q.x ; fe.y +=q.y ; fe.z +=q.z ; fe.w +=q.w ; }
42+ if (ngs.w >=0 ){ const Quat4f& q = neighForce[ngs.w ]; fe.x +=q.x ; fe.y +=q.y ; fe.z +=q.z ; fe.w +=q.w ; }
43+ W.aforces [i] = fe;
44+ }
45+ }
46+
47+ static void copySystemGeometryToFF4 ( int isys ){
48+ const int i0v = isys * W.ocl .nvecs ;
49+ for (int i=0 ; i<W.ff4 .nvecs ; i++){
50+ W.ff4 .apos [i] = W.atoms [i0v+i];
51+ }
52+
53+ Mat3d lvec;
54+ Mat3_from_cl ( lvec, W.lvecs [isys] );
55+ W.ff4 .setLvec ( (Mat3f)lvec );
56+ W.ff4 .makeNeighCells ( W.bPBC ? W.nPBC : Vec3iZero );
57+ }
58+
59+ static void packMMFFforcesToGPUBuffer ( int isys, MMFFf4& ff ){
60+ const int i0v = isys * W.ocl .nvecs ;
61+ for (int i=0 ; i<ff.nvecs ; i++){
62+ W.aforces [i0v+i] = ff.fapos [i];
63+ }
64+ }
65+
66+ static void setupTIConstraintsBatch_ ( int nCVs, Vec3f* initial_positions, Vec3f* final_positions, int nLambda, int batch ){
67+ const Quat4f no_constr = Quat4f{0 .0f , 0 .0f , 0 .0f , -1 .0f };
68+ const Quat4f no_constrK = Quat4fZero;
69+ for (int isys=0 ; isys<W.nSystems ; isys++){
70+ const int i0a = isys * W.ocl .nAtoms ;
71+ for (int ia=0 ; ia<W.ocl .nAtoms ; ia++){
72+ W.constr [i0a + ia] = no_constr;
73+ W.constrK [i0a + ia] = no_constrK;
74+ }
75+ const int il = isys + batch*W.nSystems ;
76+ if (il >= nLambda) continue ;
77+ const float lambda = (nLambda > 1 ) ? ((float )il / (float )(nLambda - 1 )) : 0 .0f ;
78+
79+ int si_count = 0 ;
80+ for (int ia=0 ; ia<W.ffls [isys].natoms ; ia++){
81+ if (W.ffls [isys].atypes [ia] != W.params .getAtomType (" Si" )) continue ;
82+ if (si_count < nCVs){
83+ Quat4f acon = Quat4f{0 .0f ,0 .0f ,0 .0f ,0 .0f };
84+ acon.f .set_sub (final_positions[si_count], initial_positions[si_count]);
85+ acon.f .mul (lambda);
86+ acon.f .add (initial_positions[si_count]);
87+ acon.w = 1e6f * (float )(si_count % 2 + 1 );
88+
89+ Quat4f aconK = Quat4f{0 .0f ,0 .0f ,0 .0f , acon.w };
90+ aconK.f .set_sub (final_positions[si_count], initial_positions[si_count]);
91+
92+ W.constr [isys*W.ocl .nAtoms + ia] = acon;
93+ W.constrK [isys*W.ocl .nAtoms + ia] = aconK;
94+ }
95+ si_count++;
96+ }
97+ }
98+ int err = 0 ;
99+ err |= W.ocl .upload ( W.ocl .ibuff_constr , W.constr );
100+ err |= W.ocl .upload ( W.ocl .ibuff_constrK , W.constrK );
101+ err |= W.ocl .finishRaw ();
102+ OCL_checkError (err, " setupTIConstraintsBatch_().upload" );
103+ }
104+
105+ static void zeroTIAverageForces_ (){
106+ for (int isys=0 ; isys<W.nSystems ; isys++) W.averageForces [isys] = Quat4fZero;
107+ int err = 0 ;
108+ err |= W.ocl .upload ( W.ocl .ibuff_averageForces , W.averageForces );
109+ err |= W.ocl .finishRaw ();
110+ OCL_checkError (err, " zeroTIAverageForces_().upload" );
111+ }
112+
113+ static void downloadTIAverageForces_ (){
114+ int err = 0 ;
115+ err |= W.ocl .download ( W.ocl .ibuff_averageForces , W.averageForces );
116+ err |= W.ocl .finishRaw ();
117+ OCL_checkError (err, " downloadTIAverageForces_().download" );
118+ }
119+
120+ static void resetTIBatchState_ (){
121+ W.resetTIBatchState (true );
122+ }
123+
23124void init_buffers (){
24125 buffers .insert ( { " apos" , (double *)W.nbmol .apos } );
25126 buffers .insert ( { " fapos" , (double *)W.nbmol .fapos } );
@@ -48,6 +149,8 @@ void init_buffers(){
48149 fbuffers.insert ( { " gpu_aforces" , (float *)W.aforces } );
49150 fbuffers.insert ( { " gpu_avel" , (float *)W.avel } );
50151 fbuffers.insert ( { " gpu_constr" , (float *)W.constr } );
152+ fbuffers.insert ( { " gpu_constrK" , (float *)W.constrK } );
153+ fbuffers.insert ( { " gpu_averageForces" , (float *)W.averageForces } );
51154
52155 fbuffers.insert ( { " gpu_REQs" , (float *)W.REQs } );
53156 fbuffers.insert ( { " gpu_MMpars" , (float *)W.MMpars } );
@@ -132,13 +235,65 @@ int run( int nstepMax, double dt, double Fconv, int ialg, double* outE, double*
132235 // return W.run(nstepMax,dt,Fconv,ialg,outE,outF);
133236}
134237
238+ void eval_getMMFFf4_ocl (){
239+ if ( W.task_MMFF == 0 ) W.setup_MMFFf4_ocl ();
240+ int err=0 ;
241+ err |= W.task_cleanF ->enque_raw ();
242+ err |= W.task_MMFF ->enque_raw ();
243+ err |= W.ocl .finishRaw ();
244+ OCL_checkError (err, " eval_getMMFFf4_ocl" );
245+ assembleMMFFforcesFromRecoil ();
246+ }
247+
248+ void eval_getMMFFf4_cpu (){
249+ W.download ( false , false );
250+ for (int isys=0 ; isys<W.nSystems ; isys++){
251+ copySystemGeometryToFF4 ( isys );
252+ W.ff4 .eval ();
253+ packMMFFforcesToGPUBuffer ( isys, W.ff4 );
254+ }
255+ }
256+
135257void MDloop ( int perframe, double Ftol = -1 , int iParalel = 3 , int perVF = 100 ){
136258 W.iParalel = iParalel;
137259 W.nPerVFs = perVF;
138260 W.iterPerFrame = perframe;
139261 W.MDloop ( perframe, Ftol );
140262}
141263
264+ void setupTIConstraintsBatch ( int nCVs, float * initial_positions, float * final_positions, int nLambda, int batch ){
265+ setupTIConstraintsBatch_ ( nCVs, (Vec3f*)initial_positions, (Vec3f*)final_positions, nLambda, batch );
266+ }
267+
268+ void zeroTIAverageForces (){
269+ zeroTIAverageForces_ ();
270+ }
271+
272+ void downloadTIAverageForces (){
273+ downloadTIAverageForces_ ();
274+ }
275+
276+ void resetTIBatchState (){
277+ resetTIBatchState_ ();
278+ }
279+
280+ void getTIHostEstimator ( int isys, int nLambda, int nMDsteps, double * out ){
281+ const int nProdStepsPerLambda = nMDsteps / nLambda;
282+ const double force_scale = 1.0 *nLambda / (double )(nMDsteps);
283+ const double force_sum = W.averageForces [isys].z + W.averageForces [isys].w ;
284+ const double dE = -force_sum * force_scale;
285+ const double mean_sq = W.averageForces [isys].x * force_scale;
286+ const double var = mean_sq - dE*dE;
287+ const double SD = sqrt (fabs (var));
288+ const double SEM = SD / sqrt ((double )nProdStepsPerLambda);
289+ out[0 ] = force_sum;
290+ out[1 ] = dE;
291+ out[2 ] = mean_sq;
292+ out[3 ] = SEM;
293+ out[4 ] = force_scale;
294+ out[5 ] = (double )nProdStepsPerLambda;
295+ }
296+
142297void set_opt (
143298 double dt_max, double dt_min, double damp_max,
144299 double finc, double fdec, double falpha, int minLastNeg,
@@ -329,8 +484,18 @@ void setSwitches_multi( int CheckInvariants, int PBC, int NonBonded, int MMFF, i
329484}
330485
331486
332- double computeFreeEnergy (int nCVs, float * initial_positions, float * final_positions, int nLambda, int nMDsteps, int nEQsteps, double Fconv, int mode, double JEforceconst){
333- return W.computeFreeEnergy (nCVs, (Vec3f*)initial_positions, (Vec3f*)final_positions, nLambda, nMDsteps, nEQsteps, Fconv, mode, JEforceconst);
487+ void setConstraints ( int hardAtoms, int softAtoms, int hardDist, int softDist, int initial ){
488+ #define _setbool (b,i ) { if (i>0 ){b=true ;}else if (i<0 ){b=false ;} }
489+ _setbool ( W.bHardConstrainedAtoms , hardAtoms );
490+ _setbool ( W.bSoftConstrainedAtoms , softAtoms );
491+ _setbool ( W.bHardConstrainedDistance , hardDist );
492+ _setbool ( W.bSoftConstrainedDistance , softDist );
493+ _setbool ( W.initial , initial );
494+ #undef _setbool
495+ }
496+
497+ double computeFreeEnergy (int nCVs, float * initial_positions, float * final_positions, int nLambda, int nMDsteps, int nEQsteps, double Fconv, int mode, double K, int hardAtoms, int softAtoms, int hardDist, int softDist){
498+ return W.computeFreeEnergy (nCVs, (Vec3f*)initial_positions, (Vec3f*)final_positions, nLambda, nMDsteps, nEQsteps, Fconv, mode, K, hardAtoms, softAtoms, hardDist, softDist);
334499}
335500
336501} // extern "C"
0 commit comments