From 36e4b066d973036c84d34ff30dc57da7e007ad3b Mon Sep 17 00:00:00 2001
From: Nils Strand <nstrand@midway3-login3.rcc.local>
Date: Tue, 2 Jul 2024 11:22:54 -0500
Subject: [PATCH 01/26] Add ttsketch module to PLUMED (TT-MetaD implementation)

some implementation progress

minor

minor

more progress

function declarations in ttsketch class

minor

change include orders

added keys and initializations to constructor

progress on plumed-related functions

all bare functionalities implemented (without parallelization)--compilation not attempted yet

edit .gitignore

Delete .vscode directory

replace eigen with internal plumed matrix functionality

few fixes

minor

typo

fix compilation errors

fix warnings

fix CONV flag related issue

change STRIDE to SAMPLESTRIDE

small bug

another small bug

debugging prints

use cout instead

gsl eps bug

more cout statements

flush stdout

small bug in update

minor

add couts back

add prints in parasketch

add prints in createttcoeff

reduce number of prints

clean prints

bugs with bias derivative and vshift

some more small fixes, plus more prints for debugging purposes

minor

comment out prints

add some extra debugging prints for visualizing bias potential

need to enable convolution immediately after sketch algorithm

minor

minor

add count feature

typo

bug fix

modify location of count print

another small bug

still more related bugs

small fix--hopefully works now

another tiny fix

fixes related to vshift

minor

prints in histogram file

revert prints

clean up

additional output regarding gradtop

widen integration limits for Gaussian smoothing

bug with min and max CV values

typo

further fix Gaussian integration if variables are periodic

minor

debug 4D case

minor

less output

oops

output in envi R part

more refined output

more print adjustments

omit details from output

more output changes

fix bug related to rank

revert changes

switch back to new version

get rid of vbias dumping

change integration limits

update default quadrature params

minor

several changes; need to improve ACA algorithm

minor

massive changes

some more progress

further progress

ready to test code

compilation fixes

small fixes

minor

add parallelization

small output change

debugging output

minor bug fix

more debugging

minor

another fix

yet more debugging

more output

even more output

another fix

more prints

another fix?

minor

idk why gsl is failing

fixed issue with epsabs

that didnt fix the problem; lets examine the integrand function more closely

output integrand more finely

play around with key

try key=4 now

try relaxing tolerance

try epsabs=1e-7

change back to 1e-8, see if other things can cause improvement

try qags integration

switch to 1e-6

try decreasing epsrel

change epsabs too

will use plumed input file to tune params

disable gsl error handle

add include

new thing to debug

issue with update vb again

minor

debuggging prints

i think its fixed now

remove debugging prints

write vbias to separate file instead of stdout

temporarily disable omp

uncomment omp lines

separate gsl workspace for each thread

vbias append to file

minor

typo

ok should work now

output vbias with original basis

small fix

fix issue with vshift

minor clean up

modifications in vbias

fix minor issues

minor

change the way vbias is outputted

another version

revert to old version

minor

comment out vbias printing

try slight modification in f

added h5 support+write TT to disk

output arg labels to file

debug outputting

change prints

this should fix it

also need to store samples

minor fixes

minor

resolve bug--can only output ITensor objects to HDF5

typo

another typo

minor

yet another small fix

more debugging output

add more debugging statemement

should hopefully fix the problem

hangs--add print statements again

this should do the trick

different bug fix

clean up

change yet again to new version, include append change

add some log output

minor

nsamples should start from 0 instead of 1

fix potential indexing problem

minor

might not need in the end

progress on fes code

a bit more progress

first attempt implemented

fix compilation issues

issue with inheritance

try multiple inheritance

some more fix attempts

minor

add component

debugging output

some more debugging prints

typo

should not use default getStride()

oops

forgot to add key

fixed--remove debugging output

free energy must do -ktlog

minor

fix input stuff

minor

maybe better now?

small fix

minor

try update instead of calculate

minor

revert to old version

don't use all samples if not using final vb

missing comma

a few changes

add some debugging output

add parallelization

add include

remove output

modify f, i think vbias 0 introduces errors

change back

test disabling omp when iterating through samples

never mind

try nomax version

change f again

move parsing of keys elsewhere

print stuff to file to fix segfault

semicolon

dont check filename unless restart

minor

remove output stuff

fix file input

one more fix

confused about error--add some prints

hopefully this fixes it

minor

big revert back to old formalism

small fixes

add macros

move implementation to cpp file

commit cpp file

cpp not cc

bug with wt

try adding support for multiple walkers

fix minor issue

some improvements in restart part

debugging output

some more output

bug fix?

change output again

lets see what all_traj looks like

this should do it

change output back

one more fix

ok now it should work

minor

somethings wrong--add debugging output

minor

output to log instead of stdout

flush more often

changes to output

more detailed output about vector sizes

some more output

still more output

something is wrong with h5 write

output to cerr instead

serialize file read

write to file

minor

small fix

confusing

minor

exrta output

small

try using gdb

print to log instead

temporarily disable ttWrite and ttRead

typo

uncomment write

print contents of tt

move barrier

fix

typo

try putting h5 in sub directory

temporarily comment out h5_write

now comment out close

commenting out close(f) fixes it?

no real need for barriers

comment out Vbias printing

bug fix

make WIDTH a vector

added ability to output adjusted vbias with smaller values

bug in adj bias

more fixing

typo

forgot to multiply adj vmax by kt

fix bug

try new adj vbias version

many changes, including trimming now before linsolve, and added covmat calculations

some compilation fixes

this is better maybe?

flush log a few times

add a bunch of debugging prints

additional prints

possible fix

somethings wrong with Pinv?

more output

cant matrixout to cout

print vbias for aladip

small bug

check mu

quick check

minor

some clean up

comment out printing of vbias

print integrals

print tt

i think this fixes integrals

clean up

typo

fix bug with empirical cov

implementation attempt for fast aca

compilation fixes

minor

a few more small changes

shift to -2kT

separate conv for vb and g

minor

improvements to quadrature grid

minor

make grid finer

comment out printing of F

minor

minor output tweaks

never mind

add option to not include all samples in aca calculations

add feature to print ttcross errors

minor

another small fix

typo

try reducing number of grid points

bug in l2 error calculations

need to print some stuff

need to take sqrt

try 5x instead of 2x

shift f from -2kT to 0

minor

dont compute vshift on restart if aca

some clean up

forgot to add samples to aca object when restarting

if error, decrease count by 1

need to check for errors multiple times

needed to check reading of tt as well

major changes--mainly memorystride and vshift

minor

small fix

1-indexing

minor

fix gradient filenames

further fix io related stuff

only barrier if multiple walkers

some debugging output

bug fix

6d bias output for debugging

minor

finer print

change limits of integration for convolution

remove vbias output

uncomment vshift output

more vbias output

comment out output

enable output

small fix

comment out output

output for 3d and 4d bias

minor

comment out vbias output

something is wrong with restart, adding some debugging output

change location of output

somehow it works

minor output

flush output

some more output

add barrier maybe

more output

more changes to output

minor

maybe fix?

minor

this should do it

add vbias output for 6d calculations

adj vbias

minor

vbias output for aladip

all ranks should output vbias

minor

change position of vbias output

also print adj vbias

minor

fix restart portion

comment out vbias printing

slight change with adj vmax

vbias is now sum of log ttsketch again

bug fix

printing of 2d marginals

typo

debugging output

minor

typo

more output

more output

refined output

should fix

filename location change

filename change

small clean up

fix the way vshift and adj vshift is handled on restart

change convergence scheme for aca

oops

when aca, search through grid samples

minor revert

another small revert

never mind

small fix

another small fix

print additional files that have gaussian kernels included

also print 2d vbias

typos

move print code to right placee

modify metad file to print vbias cross sections

vscode is being weird

include fstream

clean up mtd

minor

only print vbias stuff for ala4

fix ttcross convergence check

minor

typo

somethings wrong

undo some small changes

confused

another fix attempt

minor

remove extra output again

oops

rearrange code

re rearrange

revert cross to fix issue

add erased function

small fix

big changes--revert ttcross and implement gaussian basis

fix redundant declaration

fix ttsketch conv output

gaussian marginals implementation

small fix+clean up

fix 2d marginal output + more gaussian basis clean up

minor fix

should be able to run with gaussian basis now

fix some more output

accidentally removed code

something is wrong

this should do it

minor

added feature to compute l2 errors for each aca rank

fix errors

more small fixes

set upper limit on aca rank

invert/pseudoinvert fix

add debugging output

indexing seems to be off by 1 in some places

comment out prints

rank scheme redundance--simplify

check if res is 0 and if so break

add a lot of debugging output for troubleshooting kernel basis

fix

only mpi rank 0 should print

rearrange some code

minor

also need to only print for one k

move code around

some more output

oops

nbasis fix

typo

another typo?

exclude redundant gaussian

int2 corrections

try pseudoinverse?

multiply by ginv after cross too

change res new criterion

sump p implementation

small fix

typo in keyword registering

initialize sump height

forgot to write vb to disk

some debugging output

oops

print tt

see if nonzero ranks are trying to read prematurely

try adding barrier right after ttsumwrite

move variable declarations around

lets try something

need to append to h5 file

something is wrong with pluseq--debug

match site indices

minor

add some more useful output about ranks

oops

forgot to account for vshift

oops

try diff aca version

debugging output

more debugging

oops

more detailed output

this should fix it

minor

revert to old aca

minor

output info from ala3 vbias, sketch and mtd

include fstream

some more fixes

comment out metad code

check quality of ginv

correct matrix mult

fix output

oops

try increasing kernel basis bandwidth

remove bandwidth increase

clean up

vshift got excluded from sump vbias for some reason

comment out some vbias printing code

fix bf output for sump

minor fix

add vbias printing for 4d

comment out mtd code

slight tweak in cross convergence

slightly more

revert

cross change seems to be problematic

try compiling ttmetad

fix issues

one more small thing

some more small changes

oops

some debugging output

fix

oops

cant print vectors

try again

minor

another possible fix

even more debugging output

minor typo

test with delta

oops

test something

go back to gaussians

hard code rank

try fourier basis

clean up

use gaussians instead of dirac delta for sump

typo

fix output

can still check empirical stats

try printing error info

minor

test with dirac again

go back to gaussian

try smoothing with tiny bandwidth

separate bandwidth for gaussian sum

sigma needs to be a vector

minor fix

this should likely fix

indexing oops

debugging output

oops

segfault fix

one more fix

test hard code false

fix ttmetad

small fixes

now force true

change back

small fix

try analytical formula for conv

typo

compare two methods

oops

missing factor

typo

never mind

damn integer division

remove all numerical integration

minor fix

freeze bias potential after some time

typo

small changes

minor

minor

add vbias outputting for 3d bias

check if vbias is the same across walkers

undo

oops

some more useful output

undo again

output metad vbias

also rct

clean up

minor

comment out code instead

minor

lots of metad testing

clean up

frozen param redundant

oops

clean up once again

oops again

clean up

actually keep modifications a bit longer

set up non mw

clean again

multithreading added

minor

minor

this is better

debugging output

typo

more test

oops

more detailed test

even more detail

this should fix it

multithreading for ttcross

fix

minor

minor

minor

comment out vbias output

output for 6d bias

minor

comment out temporarily

print metad bias for each walker every now and then

barrier

one more barrier

one more thing

comment out

6d vbias output for mtd

change output temporarily

undo

improved gaussian sketch

fixes

minor

slight restructuring

typo

another fix

some more debugging output

yet another fix

minor

clean up

minor

vbias output for 6d

oops

clean up

bug with kernel width

add restart option

temporarily comment out metad output

add back

oops

change pace

better randomization

corrections

minor

more small changes

seed rng

better seeding

clean up

clean up

major clean up

add stopwatch

minor

try kernel basis with ttmetad

oops

oops again

should not sketch vb with ginv

temporarily uncomment vbias printing

comment again

improvements in kernel sketching

print vbias

clean up

improvement in aca

minor

improved quadrature

oops

vbias printing again

revert nbins

improvements in sample choice for aca

add pivot I/O

should fix issue

minor

oops

clean

minor improvement to ttsketch

minor

another tweak

minor

some restart related tweaks

minor

add option to use diff basis for aca than sketch

oops

typo

major changes to ttsketch

minor fixes

oops

initialize files earlier

try closing file before opening again

oops

reset each count

flush and close?

some debugging

minor

more detailed debugging output

rearrange code

some clean up

flush and close

more changes

minor

several other changes

minor fix

tiny fix

more improvements

weird error

this should fix it

improvements to restart part

one more thing in restart

minor

revert ttcross

need to initialize value ptrs

debugging output

empty traj if restarting from step 0

never mind

stopwatch fix

fix

clean up

file I/O for TTMetaD

typo

minor

kernel smoothing option for TTMetaD

fixes

only rank 0 process should write to file

minor

try rewind file

this should fix?

this should be better

oops

minor

roll back

fix ttread

debugging ttcross

some more debugging

minor fix

better version

lots of debugging output

some more output

oops

try rewind?

slight improvement

further improvements to file io

minor fix

clean up + nbasis for ttcross

minor

minor

fix file io

debugging output

oops

more detailed output

same oops

ugh

fix

revert

this is best i think

minor

enable vbias printing for ala4

option to specify kernel basis width

minor fix

vbias printing for aladip

oops

minor

oops

silly error

disable output

stopwatch stuff

minor

stopwatch fix

experiment with no gaussians in getbias

temporarily uncomment

fix A0

oops

indexing issue

revert

prepare for aladip

enable stopwatch

initial ttopes implementation

fixes

minor

output vbias during opes

add include

vbias output for ttopes

need to register outputs

debugging output

diff output

more refined intbasis output

some traj output

fix?

clean up

print density

fix

add grad output

minor

minor

nonintrusive version implemented

fix segfault?

debugging output

matching index fix attempt

more debugging output

this should do it?

wrong function names

fix A matrices too

1 indexed?

vbias printing for ala4

non negative least squares attempt

namespace clash

minor fixes

oops

minor

debugging output

more debugging

fix

more output

refined debugging

another fix

check matrices

oops

output extra info

minor

small fix

print nnls info

oops

typo

more detailed output

try positive sketch

try ommitting ginv

try least squares without positivity constraint

declaration issue

oops

least squares using full matrices

fixes

minor

try this?

should be good now

ok now

hmm

small revert

clean up

minor

regularize gram matrix

added clipping

pseudoinverse of ginv

try regularizing ginv

adjust lambda

try smaller lambda now

revert to 1e-6

revert to pinv for ginv and solve ls for first core instead

oops

minor

try fourier

revert

oops

try kernel without gram

revert again

try slightly different scheme

try fourier again

correct bias based on gradient magnitude

change grad threshold

1e3

try 100

revert

bias needs to be something else at the start

1e4

1e3

revert again

output some bias and gradient info

step too

typo

new p threshold

1e-3

oops

minor

set up ttmetad branch

itensor config file

minor

started cleanup

comments added

copyright stuff

ttmetad regtest

ran regtest

minor

Update src/ttsketch/TTMetaD.cpp

assign instead of resize

Co-authored-by: Daniele <Iximiel@users.noreply.github.com>
---
 .gitignore                                   |    3 +-
 regtest/.gitignore                           |    2 +-
 regtest/ttsketch/Makefile                    |    1 +
 regtest/ttsketch/rt-ttmetad/COLVAR.reference |   24 +
 regtest/ttsketch/rt-ttmetad/HILLS.reference  |    4 +
 regtest/ttsketch/rt-ttmetad/Makefile         |    1 +
 regtest/ttsketch/rt-ttmetad/config           |    3 +
 regtest/ttsketch/rt-ttmetad/plumed.dat       |   22 +
 regtest/ttsketch/rt-ttmetad/traj.gro         |  525 +++++++++
 regtest/ttsketch/rt-ttmetad/ttsketch.h5      |  Bin 0 -> 66008 bytes
 src/.gitignore                               |    1 +
 src/ttsketch/.gitignore                      |   11 +
 src/ttsketch/BasisFunc.cpp                   |  301 +++++
 src/ttsketch/BasisFunc.h                     |   78 ++
 src/ttsketch/COPYRIGHT                       |   17 +
 src/ttsketch/Makefile                        |    3 +
 src/ttsketch/TTHelper.cpp                    |  179 +++
 src/ttsketch/TTHelper.h                      |   39 +
 src/ttsketch/TTMetaD.cpp                     | 1088 ++++++++++++++++++
 src/ttsketch/module.type                     |    1 +
 src/ttsketch/options.mk                      |  221 ++++
 21 files changed, 2522 insertions(+), 2 deletions(-)
 create mode 100644 regtest/ttsketch/Makefile
 create mode 100644 regtest/ttsketch/rt-ttmetad/COLVAR.reference
 create mode 100644 regtest/ttsketch/rt-ttmetad/HILLS.reference
 create mode 100644 regtest/ttsketch/rt-ttmetad/Makefile
 create mode 100644 regtest/ttsketch/rt-ttmetad/config
 create mode 100644 regtest/ttsketch/rt-ttmetad/plumed.dat
 create mode 100644 regtest/ttsketch/rt-ttmetad/traj.gro
 create mode 100644 regtest/ttsketch/rt-ttmetad/ttsketch.h5
 create mode 100644 src/ttsketch/.gitignore
 create mode 100644 src/ttsketch/BasisFunc.cpp
 create mode 100644 src/ttsketch/BasisFunc.h
 create mode 100644 src/ttsketch/COPYRIGHT
 create mode 100644 src/ttsketch/Makefile
 create mode 100644 src/ttsketch/TTHelper.cpp
 create mode 100644 src/ttsketch/TTHelper.h
 create mode 100644 src/ttsketch/TTMetaD.cpp
 create mode 100644 src/ttsketch/module.type
 create mode 100644 src/ttsketch/options.mk

diff --git a/.gitignore b/.gitignore
index bbbaf77567..11b98c4ac8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,4 +27,5 @@ makefile.dep
 __pycache__
 #to not export personal pyenv settings
 .python-version
-
+.vscode
+.claude
diff --git a/regtest/.gitignore b/regtest/.gitignore
index c099f60447..2246e3a5dd 100644
--- a/regtest/.gitignore
+++ b/regtest/.gitignore
@@ -45,7 +45,7 @@
 !/wham
 !/metatomic
 !/sizeshape
-
+!/ttsketch
 # These files we just want to ignore completely
 tmp
 report.txt
diff --git a/regtest/ttsketch/Makefile b/regtest/ttsketch/Makefile
new file mode 100644
index 0000000000..430b3123ed
--- /dev/null
+++ b/regtest/ttsketch/Makefile
@@ -0,0 +1 @@
+include ../scripts/module.make
diff --git a/regtest/ttsketch/rt-ttmetad/COLVAR.reference b/regtest/ttsketch/rt-ttmetad/COLVAR.reference
new file mode 100644
index 0000000000..0770b96765
--- /dev/null
+++ b/regtest/ttsketch/rt-ttmetad/COLVAR.reference
@@ -0,0 +1,24 @@
+#! FIELDS time phi psi tt.bias
+#! SET min_phi -pi
+#! SET max_phi pi
+ 0.000000  -1.2379   1.9470   0.0000
+ 1.000000  -1.4839   1.9381   0.0000
+ 2.000000  -1.3243   1.9663   0.8644
+ 3.000000  -1.3340   1.9885   2.0694
+ 4.000000  -1.4613   1.8901   2.9094
+ 5.000000  -1.2202   2.0306   2.8990
+ 6.000000  -1.3883   1.8776   4.7976
+ 7.000000  -1.5481   1.9488   4.5938
+ 8.000000  -1.8429   1.9025   1.0045
+ 9.000000  -2.2424   1.9813   0.1537
+ 10.000000  -1.1482   1.9684   3.9692
+ 11.000000  -1.7580   1.9807   2.4606
+ 12.000000  -1.3186   1.9486   3.4383
+ 13.000000  -2.9911   1.9547  -0.1836
+ 14.000000  -1.4112   2.0415   4.1799
+ 15.000000  -2.5995   1.9166   0.7751
+ 16.000000  -1.4608   1.9433   5.2919
+ 17.000000  -1.3791   2.0625   6.2402
+ 18.000000  -1.6771   1.9433   5.3378
+ 19.000000  -1.5241   1.9642   7.8989
+ 20.000000  -1.1997   1.9296   6.6496
diff --git a/regtest/ttsketch/rt-ttmetad/HILLS.reference b/regtest/ttsketch/rt-ttmetad/HILLS.reference
new file mode 100644
index 0000000000..45f43f3d03
--- /dev/null
+++ b/regtest/ttsketch/rt-ttmetad/HILLS.reference
@@ -0,0 +1,4 @@
+#! FIELDS time phi psi sigma_phi sigma_psi height biasf
+#! SET min_phi -pi
+#! SET max_phi pi
+   20.000000   -1.199652    1.929639    0.200000    0.200000    1.200000   -1.000000
diff --git a/regtest/ttsketch/rt-ttmetad/Makefile b/regtest/ttsketch/rt-ttmetad/Makefile
new file mode 100644
index 0000000000..3703b27cea
--- /dev/null
+++ b/regtest/ttsketch/rt-ttmetad/Makefile
@@ -0,0 +1 @@
+include ../../scripts/test.make
diff --git a/regtest/ttsketch/rt-ttmetad/config b/regtest/ttsketch/rt-ttmetad/config
new file mode 100644
index 0000000000..a7c12c885f
--- /dev/null
+++ b/regtest/ttsketch/rt-ttmetad/config
@@ -0,0 +1,3 @@
+plumed_modules=ttsketch
+type=driver
+arg="--plumed plumed.dat --trajectory-stride 500 --timestep 0.002 --igro traj.gro"
diff --git a/regtest/ttsketch/rt-ttmetad/plumed.dat b/regtest/ttsketch/rt-ttmetad/plumed.dat
new file mode 100644
index 0000000000..b31b2acb84
--- /dev/null
+++ b/regtest/ttsketch/rt-ttmetad/plumed.dat
@@ -0,0 +1,22 @@
+phi:   TORSION ATOMS=5,7,9,15 NOPBC
+psi:   ANGLE   ATOMS=7,9,15
+
+tt: TTMETAD ...
+  ARG=phi,psi
+  SIGMA=0.20,0.20
+  HEIGHT=1.20
+  PACE=500
+  TEMP=300.0
+  FILE=HILLS
+  FMT=%12.6f
+  SKETCH_RANK=2
+  SKETCH_INITRANK=4
+  SKETCH_PACE=5000
+  INTERVAL_MIN=-3.14159,0.0
+  INTERVAL_MAX=3.14159,3.14159
+  SKETCH_NBASIS=5
+  SKETCH_ALPHA=1.0
+  DETERMINISTIC
+...
+
+PRINT STRIDE=500 FILE=COLVAR ARG=phi,psi,tt.bias FMT=%8.4f
diff --git a/regtest/ttsketch/rt-ttmetad/traj.gro b/regtest/ttsketch/rt-ttmetad/traj.gro
new file mode 100644
index 0000000000..c1c387a014
--- /dev/null
+++ b/regtest/ttsketch/rt-ttmetad/traj.gro
@@ -0,0 +1,525 @@
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=   0.00000
+   22
+    1ACE   HH31    1   1.474   1.585   1.200
+    1ACE    CH3    2   1.483   1.508   1.277
+    1ACE   HH32    3   1.476   1.561   1.372
+    1ACE   HH33    4   1.578   1.455   1.278
+    1ACE      C    5   1.353   1.428   1.279
+    1ACE      O    6   1.263   1.449   1.357
+    2ALA      N    7   1.343   1.328   1.191
+    2ALA      H    8   1.415   1.321   1.120
+    2ALA     CA    9   1.233   1.239   1.159
+    2ALA     HA   10   1.144   1.302   1.155
+    2ALA     CB   11   1.244   1.182   1.013
+    2ALA    HB1   12   1.341   1.136   0.992
+    2ALA    HB2   13   1.159   1.117   0.994
+    2ALA    HB3   14   1.242   1.265   0.942
+    2ALA      C   15   1.207   1.140   1.271
+    2ALA      O   16   1.214   1.017   1.241
+    3NME      N   17   1.191   1.177   1.398
+    3NME      H   18   1.192   1.275   1.421
+    3NME    CH3   19   1.189   1.086   1.518
+    3NME   HH31   20   1.170   0.983   1.487
+    3NME   HH32   21   1.283   1.087   1.574
+    3NME   HH33   22   1.108   1.127   1.578
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=   1.00000
+   22
+    1ACE   HH31    1   1.480   1.571   1.214
+    1ACE    CH3    2   1.481   1.493   1.289
+    1ACE   HH32    3   1.502   1.528   1.390
+    1ACE   HH33    4   1.551   1.417   1.255
+    1ACE      C    5   1.344   1.432   1.275
+    1ACE      O    6   1.250   1.462   1.345
+    2ALA      N    7   1.342   1.327   1.193
+    2ALA      H    8   1.430   1.313   1.144
+    2ALA     CA    9   1.233   1.244   1.166
+    2ALA     HA   10   1.144   1.307   1.173
+    2ALA     CB   11   1.240   1.189   1.017
+    2ALA    HB1   12   1.327   1.124   1.000
+    2ALA    HB2   13   1.150   1.128   1.005
+    2ALA    HB3   14   1.251   1.267   0.941
+    2ALA      C   15   1.221   1.133   1.271
+    2ALA      O   16   1.217   1.015   1.238
+    3NME      N   17   1.204   1.174   1.395
+    3NME      H   18   1.200   1.275   1.398
+    3NME    CH3   19   1.188   1.089   1.516
+    3NME   HH31   20   1.083   1.086   1.543
+    3NME   HH32   21   1.233   0.990   1.511
+    3NME   HH33   22   1.241   1.141   1.596
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=   2.00000
+   22
+    1ACE   HH31    1   1.532   1.520   1.209
+    1ACE    CH3    2   1.478   1.493   1.300
+    1ACE   HH32    3   1.465   1.586   1.356
+    1ACE   HH33    4   1.548   1.426   1.350
+    1ACE      C    5   1.352   1.423   1.279
+    1ACE      O    6   1.252   1.461   1.340
+    2ALA      N    7   1.351   1.326   1.190
+    2ALA      H    8   1.442   1.293   1.160
+    2ALA     CA    9   1.232   1.244   1.160
+    2ALA     HA   10   1.146   1.310   1.151
+    2ALA     CB   11   1.241   1.190   1.016
+    2ALA    HB1   12   1.333   1.132   1.008
+    2ALA    HB2   13   1.160   1.123   0.986
+    2ALA    HB3   14   1.242   1.280   0.955
+    2ALA      C   15   1.203   1.138   1.270
+    2ALA      O   16   1.161   1.021   1.240
+    3NME      N   17   1.230   1.171   1.396
+    3NME      H   18   1.257   1.266   1.417
+    3NME    CH3   19   1.217   1.090   1.512
+    3NME   HH31   20   1.144   1.011   1.493
+    3NME   HH32   21   1.307   1.029   1.526
+    3NME   HH33   22   1.212   1.146   1.605
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=   3.00000
+   22
+    1ACE   HH31    1   1.439   1.582   1.175
+    1ACE    CH3    2   1.474   1.516   1.254
+    1ACE   HH32    3   1.480   1.585   1.338
+    1ACE   HH33    4   1.569   1.465   1.242
+    1ACE      C    5   1.364   1.419   1.280
+    1ACE      O    6   1.277   1.446   1.367
+    2ALA      N    7   1.358   1.323   1.194
+    2ALA      H    8   1.443   1.313   1.140
+    2ALA     CA    9   1.235   1.243   1.164
+    2ALA     HA   10   1.150   1.310   1.170
+    2ALA     CB   11   1.240   1.197   1.019
+    2ALA    HB1   12   1.316   1.119   1.016
+    2ALA    HB2   13   1.145   1.157   0.982
+    2ALA    HB3   14   1.279   1.276   0.955
+    2ALA      C   15   1.201   1.137   1.272
+    2ALA      O   16   1.172   1.021   1.232
+    3NME      N   17   1.218   1.166   1.402
+    3NME      H   18   1.240   1.259   1.434
+    3NME    CH3   19   1.186   1.086   1.518
+    3NME   HH31   20   1.225   0.984   1.527
+    3NME   HH32   21   1.193   1.134   1.616
+    3NME   HH33   22   1.081   1.058   1.509
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=   4.00000
+   22
+    1ACE   HH31    1   1.549   1.508   1.196
+    1ACE    CH3    2   1.500   1.486   1.290
+    1ACE   HH32    3   1.487   1.571   1.357
+    1ACE   HH33    4   1.563   1.415   1.343
+    1ACE      C    5   1.362   1.425   1.270
+    1ACE      O    6   1.265   1.465   1.340
+    2ALA      N    7   1.349   1.324   1.182
+    2ALA      H    8   1.432   1.287   1.138
+    2ALA     CA    9   1.221   1.249   1.168
+    2ALA     HA   10   1.138   1.318   1.184
+    2ALA     CB   11   1.201   1.194   1.025
+    2ALA    HB1   12   1.276   1.117   1.005
+    2ALA    HB2   13   1.096   1.165   1.014
+    2ALA    HB3   14   1.229   1.265   0.947
+    2ALA      C   15   1.217   1.141   1.275
+    2ALA      O   16   1.234   1.024   1.243
+    3NME      N   17   1.183   1.174   1.400
+    3NME      H   18   1.184   1.274   1.412
+    3NME    CH3   19   1.187   1.078   1.509
+    3NME   HH31   20   1.248   0.990   1.490
+    3NME   HH32   21   1.220   1.120   1.604
+    3NME   HH33   22   1.088   1.035   1.527
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=   5.00000
+   22
+    1ACE   HH31    1   1.449   1.585   1.168
+    1ACE    CH3    2   1.479   1.518   1.248
+    1ACE   HH32    3   1.523   1.577   1.328
+    1ACE   HH33    4   1.565   1.461   1.213
+    1ACE      C    5   1.364   1.422   1.284
+    1ACE      O    6   1.305   1.438   1.389
+    2ALA      N    7   1.347   1.326   1.187
+    2ALA      H    8   1.423   1.328   1.122
+    2ALA     CA    9   1.226   1.241   1.162
+    2ALA     HA   10   1.139   1.308   1.162
+    2ALA     CB   11   1.236   1.193   1.023
+    2ALA    HB1   12   1.314   1.117   1.012
+    2ALA    HB2   13   1.137   1.167   0.986
+    2ALA    HB3   14   1.273   1.278   0.966
+    2ALA      C   15   1.195   1.133   1.268
+    2ALA      O   16   1.173   1.016   1.239
+    3NME      N   17   1.204   1.175   1.393
+    3NME      H   18   1.211   1.275   1.403
+    3NME    CH3   19   1.188   1.090   1.513
+    3NME   HH31   20   1.089   1.044   1.509
+    3NME   HH32   21   1.267   1.014   1.509
+    3NME   HH33   22   1.189   1.145   1.607
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=   6.00000
+   22
+    1ACE   HH31    1   1.517   1.511   1.181
+    1ACE    CH3    2   1.490   1.488   1.284
+    1ACE   HH32    3   1.482   1.582   1.339
+    1ACE   HH33    4   1.569   1.421   1.316
+    1ACE      C    5   1.359   1.414   1.282
+    1ACE      O    6   1.272   1.447   1.358
+    2ALA      N    7   1.351   1.320   1.186
+    2ALA      H    8   1.434   1.297   1.133
+    2ALA     CA    9   1.220   1.251   1.159
+    2ALA     HA   10   1.139   1.323   1.167
+    2ALA     CB   11   1.220   1.194   1.018
+    2ALA    HB1   12   1.298   1.120   1.003
+    2ALA    HB2   13   1.120   1.158   0.994
+    2ALA    HB3   14   1.224   1.286   0.960
+    2ALA      C   15   1.201   1.139   1.270
+    2ALA      O   16   1.190   1.022   1.239
+    3NME      N   17   1.203   1.178   1.393
+    3NME      H   18   1.207   1.277   1.409
+    3NME    CH3   19   1.211   1.102   1.515
+    3NME   HH31   20   1.111   1.064   1.534
+    3NME   HH32   21   1.275   1.017   1.492
+    3NME   HH33   22   1.264   1.151   1.597
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=   7.00000
+   22
+    1ACE   HH31    1   1.483   1.590   1.185
+    1ACE    CH3    2   1.505   1.501   1.245
+    1ACE   HH32    3   1.538   1.533   1.344
+    1ACE   HH33    4   1.580   1.430   1.209
+    1ACE      C    5   1.379   1.418   1.267
+    1ACE      O    6   1.298   1.443   1.349
+    2ALA      N    7   1.360   1.320   1.187
+    2ALA      H    8   1.426   1.297   1.114
+    2ALA     CA    9   1.224   1.253   1.180
+    2ALA     HA   10   1.147   1.326   1.205
+    2ALA     CB   11   1.174   1.215   1.037
+    2ALA    HB1   12   1.245   1.152   0.983
+    2ALA    HB2   13   1.084   1.154   1.032
+    2ALA    HB3   14   1.153   1.311   0.992
+    2ALA      C   15   1.212   1.141   1.280
+    2ALA      O   16   1.219   1.022   1.244
+    3NME      N   17   1.191   1.176   1.408
+    3NME      H   18   1.207   1.275   1.423
+    3NME    CH3   19   1.162   1.068   1.509
+    3NME   HH31   20   1.229   0.982   1.503
+    3NME   HH32   21   1.162   1.109   1.610
+    3NME   HH33   22   1.056   1.044   1.499
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=   8.00000
+   22
+    1ACE   HH31    1   1.576   1.425   1.169
+    1ACE    CH3    2   1.523   1.455   1.260
+    1ACE   HH32    3   1.547   1.556   1.294
+    1ACE   HH33    4   1.566   1.393   1.338
+    1ACE      C    5   1.385   1.422   1.255
+    1ACE      O    6   1.308   1.499   1.305
+    2ALA      N    7   1.346   1.311   1.189
+    2ALA      H    8   1.419   1.248   1.159
+    2ALA     CA    9   1.210   1.260   1.193
+    2ALA     HA   10   1.139   1.326   1.242
+    2ALA     CB   11   1.152   1.252   1.051
+    2ALA    HB1   12   1.230   1.210   0.987
+    2ALA    HB2   13   1.066   1.185   1.052
+    2ALA    HB3   14   1.127   1.354   1.024
+    2ALA      C   15   1.206   1.136   1.282
+    2ALA      O   16   1.195   1.023   1.229
+    3NME      N   17   1.210   1.152   1.420
+    3NME      H   18   1.221   1.243   1.463
+    3NME    CH3   19   1.185   1.048   1.518
+    3NME   HH31   20   1.195   0.948   1.475
+    3NME   HH32   21   1.261   1.070   1.593
+    3NME   HH33   22   1.088   1.064   1.565
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=   9.00000
+   22
+    1ACE   HH31    1   1.515   1.474   1.040
+    1ACE    CH3    2   1.535   1.461   1.147
+    1ACE   HH32    3   1.561   1.560   1.184
+    1ACE   HH33    4   1.612   1.386   1.165
+    1ACE      C    5   1.406   1.430   1.217
+    1ACE      O    6   1.361   1.502   1.307
+    2ALA      N    7   1.345   1.316   1.190
+    2ALA      H    8   1.384   1.254   1.121
+    2ALA     CA    9   1.217   1.277   1.242
+    2ALA     HA   10   1.187   1.348   1.319
+    2ALA     CB   11   1.111   1.278   1.134
+    2ALA    HB1   12   1.129   1.198   1.062
+    2ALA    HB2   13   1.018   1.269   1.189
+    2ALA    HB3   14   1.121   1.372   1.079
+    2ALA      C   15   1.217   1.133   1.309
+    2ALA      O   16   1.293   1.044   1.265
+    3NME      N   17   1.132   1.116   1.408
+    3NME      H   18   1.076   1.195   1.437
+    3NME    CH3   19   1.112   1.003   1.490
+    3NME   HH31   20   1.156   0.910   1.456
+    3NME   HH32   21   1.153   1.026   1.588
+    3NME   HH33   22   1.005   0.985   1.500
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=  10.00000
+   22
+    1ACE   HH31    1   1.536   1.485   1.135
+    1ACE    CH3    2   1.521   1.460   1.240
+    1ACE   HH32    3   1.528   1.557   1.289
+    1ACE   HH33    4   1.602   1.397   1.276
+    1ACE      C    5   1.385   1.401   1.261
+    1ACE      O    6   1.341   1.416   1.373
+    2ALA      N    7   1.322   1.332   1.167
+    2ALA      H    8   1.364   1.316   1.076
+    2ALA     CA    9   1.190   1.273   1.177
+    2ALA     HA   10   1.125   1.352   1.216
+    2ALA     CB   11   1.134   1.230   1.038
+    2ALA    HB1   12   1.167   1.128   1.016
+    2ALA    HB2   13   1.026   1.238   1.046
+    2ALA    HB3   14   1.174   1.287   0.953
+    2ALA      C   15   1.183   1.157   1.283
+    2ALA      O   16   1.111   1.061   1.259
+    3NME      N   17   1.264   1.157   1.396
+    3NME      H   18   1.320   1.241   1.405
+    3NME    CH3   19   1.264   1.070   1.510
+    3NME   HH31   20   1.163   1.057   1.548
+    3NME   HH32   21   1.311   0.974   1.487
+    3NME   HH33   22   1.326   1.109   1.592
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=  11.00000
+   22
+    1ACE   HH31    1   1.607   1.359   1.178
+    1ACE    CH3    2   1.535   1.442   1.176
+    1ACE   HH32    3   1.521   1.470   1.072
+    1ACE   HH33    4   1.569   1.533   1.224
+    1ACE      C    5   1.402   1.412   1.242
+    1ACE      O    6   1.368   1.454   1.350
+    2ALA      N    7   1.319   1.332   1.177
+    2ALA      H    8   1.363   1.279   1.103
+    2ALA     CA    9   1.194   1.289   1.231
+    2ALA     HA   10   1.153   1.355   1.308
+    2ALA     CB   11   1.092   1.300   1.113
+    2ALA    HB1   12   1.137   1.243   1.032
+    2ALA    HB2   13   1.001   1.243   1.131
+    2ALA    HB3   14   1.070   1.405   1.095
+    2ALA      C   15   1.198   1.143   1.290
+    2ALA      O   16   1.237   1.057   1.213
+    3NME      N   17   1.171   1.126   1.422
+    3NME      H   18   1.164   1.213   1.472
+    3NME    CH3   19   1.200   1.009   1.496
+    3NME   HH31   20   1.118   0.938   1.493
+    3NME   HH32   21   1.284   0.953   1.454
+    3NME   HH33   22   1.230   1.031   1.598
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=  12.00000
+   22
+    1ACE   HH31    1   1.491   1.569   1.138
+    1ACE    CH3    2   1.511   1.462   1.134
+    1ACE   HH32    3   1.614   1.449   1.166
+    1ACE   HH33    4   1.495   1.444   1.028
+    1ACE      C    5   1.416   1.394   1.229
+    1ACE      O    6   1.445   1.392   1.345
+    2ALA      N    7   1.299   1.357   1.181
+    2ALA      H    8   1.278   1.358   1.082
+    2ALA     CA    9   1.196   1.301   1.267
+    2ALA     HA   10   1.185   1.364   1.355
+    2ALA     CB   11   1.063   1.319   1.182
+    2ALA    HB1   12   1.057   1.248   1.100
+    2ALA    HB2   13   0.976   1.288   1.240
+    2ALA    HB3   14   1.037   1.423   1.161
+    2ALA      C   15   1.226   1.161   1.304
+    2ALA      O   16   1.317   1.094   1.258
+    3NME      N   17   1.135   1.110   1.387
+    3NME      H   18   1.064   1.174   1.419
+    3NME    CH3   19   1.121   0.972   1.429
+    3NME   HH31   20   1.163   0.900   1.358
+    3NME   HH32   21   1.177   0.954   1.520
+    3NME   HH33   22   1.016   0.942   1.428
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=  13.00000
+   22
+    1ACE   HH31    1   1.528   1.537   1.077
+    1ACE    CH3    2   1.543   1.448   1.137
+    1ACE   HH32    3   1.634   1.463   1.195
+    1ACE   HH33    4   1.560   1.362   1.072
+    1ACE      C    5   1.421   1.428   1.229
+    1ACE      O    6   1.396   1.520   1.301
+    2ALA      N    7   1.355   1.314   1.219
+    2ALA      H    8   1.385   1.241   1.156
+    2ALA     CA    9   1.249   1.276   1.311
+    2ALA     HA   10   1.297   1.288   1.409
+    2ALA     CB   11   1.122   1.359   1.284
+    2ALA    HB1   12   1.072   1.325   1.193
+    2ALA    HB2   13   1.053   1.352   1.367
+    2ALA    HB3   14   1.153   1.463   1.275
+    2ALA      C   15   1.204   1.131   1.290
+    2ALA      O   16   1.225   1.070   1.185
+    3NME      N   17   1.131   1.077   1.382
+    3NME      H   18   1.104   1.132   1.463
+    3NME    CH3   19   1.083   0.939   1.380
+    3NME   HH31   20   1.012   0.925   1.298
+    3NME   HH32   21   1.168   0.871   1.372
+    3NME   HH33   22   1.036   0.915   1.475
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=  14.00000
+   22
+    1ACE   HH31    1   1.543   1.468   1.123
+    1ACE    CH3    2   1.548   1.443   1.229
+    1ACE   HH32    3   1.566   1.540   1.276
+    1ACE   HH33    4   1.634   1.382   1.257
+    1ACE      C    5   1.420   1.385   1.285
+    1ACE      O    6   1.412   1.375   1.404
+    2ALA      N    7   1.324   1.361   1.200
+    2ALA      H    8   1.345   1.354   1.101
+    2ALA     CA    9   1.189   1.323   1.244
+    2ALA     HA   10   1.163   1.394   1.323
+    2ALA     CB   11   1.094   1.344   1.125
+    2ALA    HB1   12   1.098   1.270   1.045
+    2ALA    HB2   13   0.995   1.352   1.170
+    2ALA    HB3   14   1.120   1.440   1.079
+    2ALA      C   15   1.166   1.169   1.284
+    2ALA      O   16   1.055   1.145   1.330
+    3NME      N   17   1.263   1.077   1.279
+    3NME      H   18   1.356   1.102   1.247
+    3NME    CH3   19   1.246   0.942   1.335
+    3NME   HH31   20   1.142   0.911   1.345
+    3NME   HH32   21   1.293   0.868   1.270
+    3NME   HH33   22   1.292   0.934   1.434
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=  15.00000
+   22
+    1ACE   HH31    1   1.508   1.417   1.043
+    1ACE    CH3    2   1.539   1.448   1.143
+    1ACE   HH32    3   1.565   1.553   1.146
+    1ACE   HH33    4   1.630   1.395   1.169
+    1ACE      C    5   1.438   1.410   1.253
+    1ACE      O    6   1.429   1.471   1.356
+    2ALA      N    7   1.348   1.327   1.217
+    2ALA      H    8   1.362   1.285   1.126
+    2ALA     CA    9   1.233   1.289   1.298
+    2ALA     HA   10   1.269   1.295   1.401
+    2ALA     CB   11   1.107   1.379   1.286
+    2ALA    HB1   12   1.057   1.360   1.191
+    2ALA    HB2   13   1.034   1.369   1.366
+    2ALA    HB3   14   1.141   1.482   1.282
+    2ALA      C   15   1.192   1.141   1.266
+    2ALA      O   16   1.220   1.089   1.156
+    3NME      N   17   1.131   1.075   1.364
+    3NME      H   18   1.117   1.126   1.450
+    3NME    CH3   19   1.090   0.931   1.376
+    3NME   HH31   20   1.156   0.880   1.446
+    3NME   HH32   21   0.996   0.929   1.431
+    3NME   HH33   22   1.090   0.874   1.283
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=  16.00000
+   22
+    1ACE   HH31    1   1.587   1.386   1.166
+    1ACE    CH3    2   1.554   1.425   1.262
+    1ACE   HH32    3   1.559   1.534   1.266
+    1ACE   HH33    4   1.627   1.390   1.335
+    1ACE      C    5   1.417   1.372   1.300
+    1ACE      O    6   1.390   1.364   1.421
+    2ALA      N    7   1.334   1.344   1.205
+    2ALA      H    8   1.365   1.367   1.111
+    2ALA     CA    9   1.190   1.324   1.235
+    2ALA     HA   10   1.160   1.386   1.319
+    2ALA     CB   11   1.105   1.363   1.112
+    2ALA    HB1   12   1.129   1.297   1.029
+    2ALA    HB2   13   0.999   1.351   1.131
+    2ALA    HB3   14   1.125   1.467   1.088
+    2ALA      C   15   1.162   1.180   1.282
+    2ALA      O   16   1.054   1.128   1.258
+    3NME      N   17   1.261   1.106   1.318
+    3NME      H   18   1.349   1.152   1.336
+    3NME    CH3   19   1.250   0.960   1.324
+    3NME   HH31   20   1.264   0.908   1.229
+    3NME   HH32   21   1.322   0.926   1.399
+    3NME   HH33   22   1.153   0.923   1.357
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=  17.00000
+   22
+    1ACE   HH31    1   1.614   1.382   1.173
+    1ACE    CH3    2   1.559   1.432   1.253
+    1ACE   HH32    3   1.547   1.532   1.213
+    1ACE   HH33    4   1.625   1.444   1.339
+    1ACE      C    5   1.426   1.380   1.288
+    1ACE      O    6   1.411   1.348   1.406
+    2ALA      N    7   1.325   1.370   1.200
+    2ALA      H    8   1.341   1.388   1.102
+    2ALA     CA    9   1.189   1.318   1.239
+    2ALA     HA   10   1.170   1.358   1.338
+    2ALA     CB   11   1.084   1.388   1.158
+    2ALA    HB1   12   1.113   1.369   1.055
+    2ALA    HB2   13   0.994   1.336   1.189
+    2ALA    HB3   14   1.075   1.493   1.186
+    2ALA      C   15   1.170   1.163   1.252
+    2ALA      O   16   1.097   1.103   1.171
+    3NME      N   17   1.235   1.099   1.349
+    3NME      H   18   1.305   1.153   1.399
+    3NME    CH3   19   1.220   0.964   1.393
+    3NME   HH31   20   1.315   0.912   1.406
+    3NME   HH32   21   1.168   0.953   1.489
+    3NME   HH33   22   1.178   0.897   1.318
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=  18.00000
+   22
+    1ACE   HH31    1   1.555   1.419   1.126
+    1ACE    CH3    2   1.554   1.431   1.235
+    1ACE   HH32    3   1.554   1.535   1.267
+    1ACE   HH33    4   1.638   1.381   1.283
+    1ACE      C    5   1.423   1.378   1.292
+    1ACE      O    6   1.416   1.347   1.409
+    2ALA      N    7   1.322   1.351   1.211
+    2ALA      H    8   1.350   1.370   1.115
+    2ALA     CA    9   1.185   1.318   1.251
+    2ALA     HA   10   1.171   1.358   1.352
+    2ALA     CB   11   1.091   1.400   1.149
+    2ALA    HB1   12   1.059   1.346   1.060
+    2ALA    HB2   13   1.008   1.443   1.205
+    2ALA    HB3   14   1.154   1.480   1.109
+    2ALA      C   15   1.163   1.166   1.255
+    2ALA      O   16   1.060   1.108   1.214
+    3NME      N   17   1.255   1.102   1.328
+    3NME      H   18   1.335   1.159   1.354
+    3NME    CH3   19   1.246   0.964   1.369
+    3NME   HH31   20   1.154   0.913   1.344
+    3NME   HH32   21   1.323   0.899   1.327
+    3NME   HH33   22   1.251   0.953   1.477
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=  19.00000
+   22
+    1ACE   HH31    1   1.559   1.326   1.133
+    1ACE    CH3    2   1.552   1.413   1.200
+    1ACE   HH32    3   1.538   1.504   1.142
+    1ACE   HH33    4   1.631   1.428   1.273
+    1ACE      C    5   1.426   1.375   1.280
+    1ACE      O    6   1.428   1.345   1.398
+    2ALA      N    7   1.319   1.363   1.209
+    2ALA      H    8   1.322   1.382   1.110
+    2ALA     CA    9   1.191   1.329   1.265
+    2ALA     HA   10   1.186   1.364   1.368
+    2ALA     CB   11   1.075   1.391   1.178
+    2ALA    HB1   12   1.098   1.383   1.072
+    2ALA    HB2   13   0.980   1.348   1.211
+    2ALA    HB3   14   1.066   1.499   1.192
+    2ALA      C   15   1.169   1.174   1.275
+    2ALA      O   16   1.086   1.118   1.203
+    3NME      N   17   1.233   1.105   1.363
+    3NME      H   18   1.312   1.150   1.405
+    3NME    CH3   19   1.242   0.956   1.350
+    3NME   HH31   20   1.219   0.899   1.440
+    3NME   HH32   21   1.165   0.918   1.284
+    3NME   HH33   22   1.333   0.921   1.301
+  10.00000  10.00000  10.00000
+Generated by trjconv : Gromacs Runs One Microsecond At Cannonball Speeds t=  20.00000
+   22
+    1ACE   HH31    1   1.622   1.459   1.287
+    1ACE    CH3    2   1.546   1.434   1.214
+    1ACE   HH32    3   1.578   1.365   1.135
+    1ACE   HH33    4   1.509   1.531   1.180
+    1ACE      C    5   1.430   1.358   1.283
+    1ACE      O    6   1.444   1.310   1.394
+    2ALA      N    7   1.315   1.370   1.215
+    2ALA      H    8   1.323   1.427   1.132
+    2ALA     CA    9   1.178   1.334   1.258
+    2ALA     HA   10   1.163   1.376   1.357
+    2ALA     CB   11   1.079   1.393   1.158
+    2ALA    HB1   12   1.096   1.348   1.060
+    2ALA    HB2   13   0.979   1.367   1.192
+    2ALA    HB3   14   1.087   1.501   1.165
+    2ALA      C   15   1.163   1.177   1.270
+    2ALA      O   16   1.073   1.114   1.216
+    3NME      N   17   1.249   1.110   1.347
+    3NME      H   18   1.324   1.165   1.388
+    3NME    CH3   19   1.242   0.964   1.360
+    3NME   HH31   20   1.325   0.920   1.416
+    3NME   HH32   21   1.151   0.938   1.414
+    3NME   HH33   22   1.221   0.913   1.266
+  10.00000  10.00000  10.00000
diff --git a/regtest/ttsketch/rt-ttmetad/ttsketch.h5 b/regtest/ttsketch/rt-ttmetad/ttsketch.h5
new file mode 100644
index 0000000000000000000000000000000000000000..03832f1a8df0ba560f4fc1aa705bbcd32b44e089
GIT binary patch
literal 66008
zcmeHQ3v?9K8NLakTLQ-wt&LXg)(6B#b(12dD(*@Fg9@%fPa{@MDuF~p2oN<`J!h>V
z3)Z>^j~c8V!B(5L_K-fTv04+w)U~HAsZz;N!C0#T$4FWqP(4a#_W!>f_GWe_j~&?A
zxto*y=id9@nYrKm_kY~E_wK5SX*0%+IeLuHKR%y0NcfY7_}a^#o`Qm87SbrM;sYub
zBF5>~LOLiA`kxTRoPQXfU*?;8+4Si`<HY$vxti!5O-Jc>hf+oJ9f6AJ=STE5R&qlj
zPYISYw7;q8>Qen!qh&gUT!!_;Fg^uZkJwj!{VcyFhtSEQd`>(0ZTAtQl3xbQV?>tU
zAh$opdJS;Y*EB3_suf~secd8Gub%0w$eb%muM|S3Wg3NCm!M(>I%oDJ(+WhPVDS@y
zQN)P)6`Zb>^sE22biQ&<SFXr#p*+uq-IDPdE9eOnh|wCxKGC$iu||jDaIK>sq*36k
zJ+~x6V+B3=0*!a9_J2do(q(mv8-!nbK=h2lNj}sw=)rU@&d}@YAC!;M^pbuopGUtd
zJ-@K9Fv;p;OFXJTkj>}w=;zWu<M%ml;Zt38Ld2;c$G%CfM~eX8SKtUbNH$VCtl)I*
zk(5tQNr;w<()l_$-8+l&2~{ClS-(^6z;D<e|KS76_RZ16fIj1KkXsbylhr`#=8*r#
zon7l!T=?V5p{9UuijfZ*rj(vGr>?0+M0c;Oej<AJ7@Qzeg?g>R#dQtWiRRY|B4Wed
zOa2r*qiEu|@KMhOn<uVHgkq0Be8bH%4)64xx$C}1Rvy}U(BH3l@|ZQ-Iz;(h+aB65
zwWIf(Ni~%lukR3leqzmA#Y;NEYbWmh;>mbN^qEoD-qjZi-FU`HU*H~wd{1T3(z93p
z`oe4NTU|!2=0@yn+f_TYxhquv_DhqCO1nex*Pots=v6O-eqKIy{+5&82~D5+>7!Nt
zjiD(kS3k4is24lhUhnO^^W8f_C+;|P?yQc!(E4pRZ8_%tu8@D@+NTn?z8tzfcJd!&
zq*YoWpb$_9C<GJ&BS)Y>WZSRwx!SMHJ|2FT<6%Dj(aU~ylts{P{3CEQF{b?Ck&CW6
zgNqQ5$3GB1#aNEel)8rb%k<-AO^cUSFRV$$w_z|0k*4ByDIT<n+w~X_Vav?9H4V!a
zFV$l8;l}Oe{ipV;)Q}jr$FBc~V~G**DOXd(BM8X-C&2Aa9F-0L#t*fARMV^tg%d4H
z$#Z!+yKPDyKdkx+RYCuVak`c71NuENC7rLI)BdpSCG(O2dPv60&?^JS55J@#0i0**
z&vQ;q6TmU*9q4hyVpsb&B;WezDSaxABMRIbIUNsBKJ1q7^mM);rz2!0QlqN<tmlb4
zf4|{310TF6^L?G#9Ob6l*}0bEiLr$F74a4VvYq4ZB-#S$IMmIj^n7|vCg#^IO=dLK
z*W92VG*vH5`jP0zcJc&yZT|+HH$p`7`>3Yb`<;U|9-Kd^pS85(dRe;$`G8^VNF?7n
z>Gv@3{Z1ec^z!JGUhX|1+1!j@&^!GnW@Jeg5O&%;71m3cxC1DdfCF<1x$;k>a>zqD
zrz=l3ueZN)p_d`3BY$sL;%d3dJ|01ZhbGL%d*yPM<6$1JcXL0Ea>upvW2#OiM);Ln
zO%abEAp0?}PtB4&$mFkthlvy7OxgZe`7!!b2iUE-)eDnRrsIB$W{3yp$An$2Pvr5F
z$SKwj7W$&~G;2PumD<~Fn&+g?yVA!^GS7ReV0qHHjL*P%Pc24a4j!SO%i|M9zb(@G
zmrpEftiGn^YRpR0=A~(E*>_&r%mx}#0>02wh=&qEYJ8M2u(NxW$yv98qb9DTh-VOx
z?H-GdXv6^{;Z*$I%;U@vzK)gM%kB96(mb$xM*P0+o7P<bdZ%rYH6P;lisUT+%)9|E
z>O3<uk6wcKy?Zk8!4KNQ>Edr$*LS)UdKnJd$m93RT`gDH$D>-|p$V}Ozt_1O5A*mv
z&iy#b9oEi|37kWWe&$!iTL{R04D3_6WDlH*-(~w_<;Uc9{JzB1`b3W3{byVE1L%v|
zbFKNjR%&mz2OGaHXL-tx_+6eiF#P!4-0sD~Bxx}gI9t0nl^Ee?b2UXgf`DxIpwEKg
zbO7d8H&yF7h?gqTi-(-Tc$Pd*%;*mT=SLm7&-JHi(==;-Fr$8Pb<^ZYw~XXH{2|mP
zZdd`Ymn`7@4_NtN@uV1~mFujB1wyIu>`6sp(q3Lrml}s0Me8q%L?Fq}r_+vgy;mUS
z*DR>7)|Stq-=+NV!ou2b6Po&^QEakK6C)nPoUL!Ne~3D@^`+-$>&)IdkFIz1=Zt+V
zK(ELE!w(^7T#jT)05a55*$M%LfI>hapb$_9xEFy;`wmskrZz>nX?F2k%caB!Kc1^8
z;t>Socn;&3g5?M>XzPR=S$8MzuVOWhsr$AZb_u7}-Su8ZEsk-+?fOsiTw+vyHv{G|
zBKMyFw?E8!6>vDc?yiUXEBc4HgsvQr^n3Vv$_HO9r(-wbPQ67jt|a4SHLj$OECavX
zU3a&YhXm3!)4+Lb@tl0%^18du@6c@m_wAhaRZ>3eRy4W0n(=&EIh|NYd_bdiPEIyE
z>o<k>D!>Qx_M(hil&@%K=lt`Cu_Wtvs7~kx1Y|oG<eM&GIYJwpnzt9^evDCUD?c%}
z=k2|d2YT6<w^x~IfSLsYdZ+e#kPveUy(}&_FGp^G51Ow;jyZ*%id{wJup4ch?yfSg
zx4&|sm*Gg@4GQja==*i{xmvEWkH^Of4^5a;?+FV&cR3#B^A{?rsmDgS<J$Q#J>Mrr
zHQvR5d5p+@4D3^XRXPw(&D)dhkCh*j+w=BD7ZGA9%biTRE50=LJx{P6NtXg61$`0V
zeI<+%Zy(v)?ZM958^`jLAM^I)@d;x*DLd=#!nGuKQ5HWtyVqSujPQ%Onj#)SK(>2`
z-?#I)7+`QJe(&d<$=n~mADIVs&xqfPufvl!r|^$T>dniM3ooFP^%C-B8sLob<gb^=
zrA8Whs-l7N5EnOcI^JmT9*84*yEoL|+c?hEa+Q5NCMY~K!8YRe(_M~-dHn8MLcKT2
zo!HKgX`yZ0Qu}^!HAOsvfb7S>KD9~qz^V9Mwm<HT-zU3TpUCU(Vys6pbpt;a^d@I9
zW#aN)zP;TZZ2W#6%Ts>D@AABX;m7ahc26{uti@R5Z0+81VuYXVJxTR;i)Fh9eby??
z+j45%ojgy>=ntM<ch`FpiKPBsE!8atj(<lS5nOrQU5xdKBNxl#lj?eRDC_PnU@Hem
z_@$~L04PvYA)pXY2q**;0t$h=K_Jt*yY3a##3;AaE}pAeNsRF0xtby#K|qe@g52(A
zmLtG`bwf+*>4!|}nQFfme?_S_2Ue$L8pZoDyf;IAnPR<5`E68E$=9<Q$J}>6;N1J2
zdjFl&l2izmOb(30;xZIDk)}A#_?`O1YPu~vPyaoX4&F-?&$sF?>3lIxxBiv#fkxyG
zN@x1U_<8%POdOOCJ=Ges<~vZO_T!jCeIHxwYPrfj9s!r*VLlGq%Ka)jNWm>+KHk15
zRSWs^KHd7yNuyqTbu^ctAEy}kpkYesX>;nd4V|WaxOmY8Hy<Z(f=m_awF(#4HC!i}
zUn_`+4SW0dt{wN_#a|0IJ-6$NSbTHGJv~jgmaRCU^X>`vuX*9sV>(+-I{1U<&fU==
zddAH@V!<t;_ycQi{9ygM4$)e=`SyDkc8D>n-q^hRme8J}u9uFO_gJWF)5(wG9)^5R
zcljP$Tr{z7n|-UxsMXwv$a{(B7LD%;?Y(p2%--8Kh5Ftso_tor#?aRB4=yOLe<!qa
zYxtuRFWMNoY|;Bq&k7|v633r)?4~2<hN6l7&u*{yAawqg1)qH~r7JYAr+>$y=U)k}
zU42TQjI>HC1QY@a0fm4<VB`qc#r4};=o;{sM}9?h1~(xf$MuM>BJ5|PEl%yHBgczY
zalIZR!rgRgKb_zM)MA8<cKs*u6XHkw3cM8c1_E;b32?LHtXF{o#>KRL<jC*Xm)u8J
zLjQ<xx>3^a@n_Qc+Bx0xEbi1>6yu9BURLu(^ciEoId}KdDRK4qhrFMT|DklbE$4LQ
z!;}xZ)%|!n-yTjEZ=ihW>2}uhl$*cb7*_*6*iWaMaf@=(?d)9DdSWceU#L{eED(_G
zT<ZM~%Mq$NwH`0Z{W!uGvhovidq16E9_VFbJznc0c=F~HdZ+!T=H<wRUXH9ojyVl*
zcH(E|<;aDe>UorMV6Vkvln%6l^T1H(WjGQe-%qF9)pC`6Ji-bOO|*^mcoi<k!~A_%
z3-{wFcVjz0rsS8zsK&b(Fpm-0kAZy(OZLF2^?0)VvGQYbdq16stM!Sz9<TTp)*S)*
zqVZSOd|oTHx7&kVk5|d^lppKy<nak(JSjW->BM<FBgz72XZHfXAx8McUXxR=w^z1%
zh~LXuj)0F-@q3uZncW}1SLcD<GvfE!-{Q%eQ}{>CzcVjKu8$A=ZOAdF0nU#7-n<;S
z&{Hi>P!8hvPEPke3C;sU?d{%B=Wo`zTCTE>#}b8yCfY{)zTD+_n8)vx?bLsx+>P!0
znC(vyBm8l$rie!nko_3gC&6+A7@UgVW&7ja`2AK_>k~PC?~$HA<Ny@<BKS0N6x9*1
zx7&k_-&e6b<wyK3&l?zi{BCadA{``uF&0BxyVpsK@Uyv^A|63NwtLWLd!%_=PVJ{7
z&l5BHgJ<{C2|iD<H%23=ZaHurXT%W+CHsfB#zOS4K5^t?d3;h`?+#@@oi%Lb0Ezza
z^-oFwD5<^`0tx|zfI>hapb*F-1TyWX6Zji7GRkeWi|4Aki4lH0S5w3z2*~jq#xWDT
z9|~Y_YTcc@|BBT(rtaI~{hL$k?ppsrEsk-+?fOsn6=GC=Hv{G|BKMyFx8KKl6>vDc
z?rt6Hee{n6r?+pSJAi)oeMD*S1vwr01b6B!igkA~URL8u?pSx%mj~}v^k4I(Pi!+$
z$ZZ>^yI-Yz*sY3xr}H&(I{q%@Lr(`^rgXl4<Gl*-!MeMOE!6TT17K(8dU}Ylg!vWm
z76P)J3v%=Ox&HwTr{?YLd7G{f;R{*$iMhS*?&Cbr%f`IDz-xH&<`jA-__}#Ha-o+K
zte224(*S3L-;hhv)6i3qt&|VD(ah<VH`Dp|cM5tL0S(@u;68``JNBQuTCTE>$LJr?
zGlgVPYT$dZ`2K$MgSuY!@fhcFJj~}W_<IS?D5GlU$E<sc7~xlPHAOsvfb7S>KDA5s
zz^QqAvi-60V{&`l-IrahPvm)f@m}i=0DZCjU)FqHE48;%gPpf`EXz}V%-fU4Cyeo=
z?5w-<y+@K3Wr4G^doA0E5q>dOQ^X?($aXKt?QY|7F~Hzd{NDZ{T_M63va)-*9luY=
z1G{I$@4Y+l<jpDkqyC-d<;X>p6=S`Ge3=H2@jedBDfE<|_a#SMT+ZprUFP-nSFXL?
z8|u2d(_JlB*~eqD!b1~gW8K|(F2}<>e(&af9OaH{=f_n2hZy0Hb2UXgf`IJDz&<rg
z_Q0w5UA8|~eoSu1?=xJjPvrPL(r?`Xpf6hgYt84iQhU2S*!X=m%Ts>D@AABX;m7ah
zb}zy6ZDK5Nwsvn1F~ZOGnw)yQy|UedK5LZbZ8^2>PM#-b^aszbyKDU)31v>KyNm4h
ndJh?1`*poLly!ICWh;lPsdaZiLQ#c)LO>y)5Ksu@F9QDunyK!i

literal 0
HcmV?d00001

diff --git a/src/.gitignore b/src/.gitignore
index 5a6cbc3d5b..af37f45919 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -69,6 +69,7 @@
 !/metatensor
 !/metatomic
 !/sizeshape
+!/ttsketch
 
 # And just ignore these files
 *.xxd
diff --git a/src/ttsketch/.gitignore b/src/ttsketch/.gitignore
new file mode 100644
index 0000000000..efe518894f
--- /dev/null
+++ b/src/ttsketch/.gitignore
@@ -0,0 +1,11 @@
+/*
+# in this directory, only accept source, Makefile and README
+!/.gitignore
+!/*.c
+!/*.cpp
+!/*.h
+!/Makefile
+!/README
+!/module.type
+!/COPYRIGHT
+!/options.mk
diff --git a/src/ttsketch/BasisFunc.cpp b/src/ttsketch/BasisFunc.cpp
new file mode 100644
index 0000000000..aeca9b20dc
--- /dev/null
+++ b/src/ttsketch/BasisFunc.cpp
@@ -0,0 +1,301 @@
+#include "BasisFunc.h"
+
+using namespace std;
+
+namespace PLMD {
+namespace ttsketch {
+
+BasisFunc::BasisFunc()
+  : dom_(make_pair(0.0, 0.0)), nbasis_(0), L_(0.0), shift_(0.0), w_(0.0), kernel_(false), dx_(0.0) {}
+
+BasisFunc::BasisFunc(pair<double, double> dom, int nbasis, double w, bool kernel, double dx)
+  : dom_(dom), nbasis_(nbasis), L_((dom.second - dom.first) / 2), shift_((dom.second + dom.first) / 2), w_(w), kernel_(kernel), dx_(dx)
+{
+  if(kernel) {
+    double spacing = (dom.second - dom.first) / (nbasis - 1);
+    if(dx == 0) {
+      // default kernel width equals the grid spacing
+      this->dx_ = spacing;
+    }
+    // nbasis-1 kernel centers, uniformly spaced starting at dom.first
+    this->centers_ = vector<double>(nbasis - 1);
+    for(int i = 0; i < nbasis - 1; ++i) {
+      this->centers_[i] = dom.first + i * spacing;
+    }
+    // Compute the Gram matrix G where G(i,j) = int phi_i(x)*phi_j(x) dx,
+    // integrating the periodized Gaussian kernels analytically over the domain.
+    // phi_1 = 1 (constant), phi_{i+1} = sum_{k=-1,0,1} exp(-(x-c_i+2kL)^2/(2*dx^2)).
+    // The pseudo-inverse of G is stored in ginv_ and applied after sketching to
+    // recover dual basis coefficients from the Gram representation.
+    this->gram_ = Matrix<double>(nbasis, nbasis);
+    // G(0,0) = int 1*1 dx = domain length
+    this->gram_(0, 0) = this->dom_.second - this->dom_.first;
+    for(int i = 1; i < nbasis; ++i) {
+      // G(0,i) = G(i,0) = int phi_{i+1}(x) dx (integral of a periodized Gaussian)
+      this->gram_(i, 0) = this->gram_(0, i) = this->dx_ * sqrt(M_PI / 2) *
+                                (erf((this->dom_.second -
+                                2 * this->dom_.first + this->centers_[i - 1]) /
+                                (sqrt(2) * this->dx_)) -
+                                erf((this->dom_.first - 2 * this->dom_.second +
+                                this->centers_[i - 1]) /
+                                (sqrt(2) * this->dx_)));
+      // G(i,j) = int phi_{i+1}(x)*phi_{j+1}(x) dx; the double sum over k,l
+      // accounts for cross-terms between periodic images of each kernel.
+      for(int j = i; j < nbasis; ++j) {
+        double result = 0.0;
+        for(int k = -1; k <= 1; ++k) {
+          for(int l = -1; l <= 1; ++l) {
+            result += this->dx_ / 2 * exp(-pow((this->dom_.first -
+                      this->dom_.second) * (k - l) + this->centers_[i - 1] -
+                      this->centers_[j - 1], 2) / (4 * pow(this->dx_, 2))) *
+                      sqrt(M_PI) * (erf((this->dom_.first * (k + l - 2) -
+                      this->dom_.second * (k + l) + this->centers_[i - 1] +
+                      this->centers_[j - 1]) / (2 * this->dx_)) -
+                      erf((this->dom_.first * (k + l) - this->dom_.second *
+                      (k + l + 2) + this->centers_[i - 1] +
+                      this->centers_[j - 1]) / (2 * this->dx_)));
+          }
+        }
+        this->gram_(i, j) = this->gram_(j, i) = result;
+      }
+    }
+    pseudoInvert(this->gram_, this->ginv_);
+  }
+}
+
+// Orthonormal Fourier basis (not convolved):
+//   pos=1: phi_1(x) = 1/sqrt(2L)
+//   pos even: phi_pos(x) = 1/sqrt(L) * cos(pi*(x-shift)*(pos/2)/L)
+//   pos odd:  phi_pos(x) = 1/sqrt(L) * sin(pi*(x-shift)*(pos/2)/L)
+double BasisFunc::fourier(double x, int pos) const {
+  if(pos == 1) {
+    return 1 / sqrt(2 * this->L_);
+  } else if(pos % 2 == 0) {
+    return sqrt(1 / this->L_) * cos(M_PI * (x - this->shift_) * (pos / 2) / this->L_);
+  } else {
+    return sqrt(1 / this->L_) * sin(M_PI * (x - this->shift_) * (pos / 2) / this->L_);
+  }
+}
+
+// Periodized Gaussian kernel basis:
+//   pos=1: phi_1(x) = 1 (constant)
+//   pos>1: phi_pos(x) = sum_{k=-1,0,1} exp(-(x - c_{pos-1} + 2kL)^2 / (2*dx^2))
+// The k=-1,0,1 sum enforces periodicity across domain boundaries.
+double BasisFunc::gaussian(double x, int pos) const {
+  if(pos == 1) {
+    return 1.0;
+  } else {
+    double result = 0.0;
+    for(int k = -1; k <= 1; ++k) {
+      result += exp(-pow(x - this->centers_[pos - 2] + 2 * k * this->L_, 2) / (2 * pow(this->dx_, 2)));
+    }
+    return result;
+  }
+}
+
+double BasisFunc::fourierd(double x, int pos) const {
+  if(pos == 1) {
+    return 0.0;
+  } else if(pos % 2 == 0) {
+    return -pow(1 / this->L_, 1.5) * M_PI * (pos / 2) * sin(M_PI * (x - this->shift_) * (pos / 2) / this->L_);
+  } else {
+    return pow(1 / this->L_, 1.5) * M_PI * (pos / 2) * cos(M_PI * (x - this->shift_) * (pos / 2) / this->L_);
+  }
+}
+
+double BasisFunc::gaussiand(double x, int pos) const {
+  if(pos == 1) {
+    return 0.0;
+  } else {
+    double result = 0.0;
+    for(int k = -1; k <= 1; ++k) {
+      result += (this->centers_[pos - 2] - x - 2 * k * this->L_) /
+                pow(this->dx_, 2) * exp(-pow(x - this->centers_[pos - 2] +
+                2 * k * this->L_, 2) / (2 * pow(this->dx_, 2)));
+    }
+    return result;
+  }
+}
+
+// Evaluate basis function phi_pos(x), optionally in convolution (conv) mode.
+// Conv mode for Fourier: multiplies each harmonic by exp(-(pi*w*k)^2/(2L^2)),
+//   which is equivalent to convolving the basis function with a Gaussian of width w.
+// Conv mode for kernel: widens each kernel analytically to std dev sqrt(dx^2+w^2),
+//   equivalent to convolving the Gaussian kernel with a Gaussian of width w.
+// In both cases the constant basis function (pos=1) is unchanged by convolution.
+double BasisFunc::operator()(double x, int pos, bool conv) const {
+  if(this->kernel_) {
+    if(conv) {
+      if(pos == 1) {
+        return 1.0;
+      } else {
+        // Gaussian kernel convolved with Gaussian of width w_: std dev becomes sqrt(dx^2+w^2)
+        double result = 0.0;
+        for(int k = -1; k <= 1; ++k) {
+          result += exp(-pow(x - this->centers_[pos - 2] +
+                    2 * k * this->L_, 2) / (2 * (pow(this->dx_, 2) +
+                    pow(this->w_, 2)))) / (sqrt(1 / pow(this->dx_, 2) + 1 /
+                    pow(this->w_, 2)) * this->w_);
+        }
+        return result;
+      }
+    } else {
+      return gaussian(x, pos);
+    }
+  } else {
+    if(conv) {
+      if(pos == 1) {
+        return 1 / sqrt(2 * this->L_);
+      } else {
+        // Fourier convolution theorem: multiplying by exp(-(pi*w*k)^2/(2L^2))
+        // is equivalent to convolving phi_pos with a Gaussian of width w_
+        return exp(-pow(M_PI * this->w_ * (pos / 2), 2) / (2 * pow(this->L_, 2))) * fourier(x, pos);
+      }
+    } else {
+      return fourier(x, pos);
+    }
+  }
+}
+
+double BasisFunc::grad(double x, int pos, bool conv) const {
+  if(this->kernel_) {
+    if(conv) {
+      if(pos == 1) {
+        return 0.0;
+      } else {
+        double result = 0.0;
+        for(int k = -1; k <= 1; ++k) {
+          result += pow(this->dx_, 2) * exp(-pow(x - this->centers_[pos - 2] +
+                    2 * k * this->L_, 2) / (2 * (pow(this->dx_, 2) +
+                    pow(this->w_, 2)))) * (this->centers_[pos - 2] -
+                    2 * k * this->L_ - x) * sqrt(1 / pow(this->dx_, 2) +
+                    1 / pow(this->w_, 2)) * this->w_ / pow(pow(this->dx_, 2) +
+                    pow(this->w_, 2), 2);
+        }
+        return result;
+      }
+    } else {
+      return gaussiand(x, pos);
+    }
+  } else {
+    if(conv) {
+      if(pos == 1) {
+        return 0.0;
+      } else {
+        return exp(-pow(M_PI * this->w_ * (pos / 2), 2) / (2 * pow(this->L_, 2))) * fourierd(x, pos);
+      }
+    } else {
+      return fourierd(x, pos);
+    }
+  }
+}
+
+// Compute int_a^b phi_pos(x) dx analytically.
+// Used by covMat() to evaluate the partition function Z = <1> under the TT distribution.
+double BasisFunc::int0(int pos) const {
+  if(this->kernel_) {
+    if(pos == 1) {
+      return this->dom_.second - this->dom_.first;
+    } else {
+      return -this->dx_ * sqrt(M_PI / 2) * (erf((2 * this->dom_.first -
+             this->dom_.second - this->centers_[pos - 2]) /
+             (sqrt(2) * this->dx_)) + erf((this->dom_.first -
+             2 * this->dom_.second + this->centers_[pos - 2]) /
+             (sqrt(2) * this->dx_)));
+    }
+  } else {
+    if(pos == 1) {
+      return sqrt(2 * this->L_);
+    } else if(pos % 2 == 0) {
+      return 2 * sqrt(this->L_) / (M_PI * (pos / 2)) * sin(M_PI * (pos / 2));
+    } else {
+      return 0.0;
+    }
+  }
+}
+
+// Compute int_a^b x*phi_pos(x) dx analytically.
+// Used by covMat() to evaluate E[x] = (1/Z) * <x> under the TT distribution.
+double BasisFunc::int1(int pos) const {
+  if(this->kernel_) {
+    if(pos == 1) {
+      return (pow(this->dom_.second, 2) - pow(this->dom_.first, 2)) / 2;
+    } else {
+      double a = this->dom_.first;
+      double b = this->dom_.second;
+      double c = this->centers_[pos - 2];
+      double dx_sq = pow(this->dx_, 2);
+      double sqrt2pi = sqrt(2 * M_PI);
+      double term1 = exp(-pow(b - 2 * a + c, 2) / (2 * dx_sq)) -
+                    exp(-pow(a - 2 * b + c, 2) / (2 * dx_sq));
+      double term2 = (b - a + c) * sqrt2pi * erf((a - c) / (sqrt(2) * this->dx_));
+      double term3 = (a - b - c) * sqrt2pi * erf((2 * a - b - c) / (sqrt(2) * this->dx_));
+      double term4 = sqrt2pi * (c * erf((-a + c) / (sqrt(2) * this->dx_)) -
+                      (a - b + c) * erf((a - 2 * b + c) / (sqrt(2) * this->dx_)) +
+                      (a - b) * erf((-b + c) / (sqrt(2) * this->dx_)));
+      return this->dx_ / 2 * (2 * this->dx_ * term1 + term2 + term3 + term4);
+    }
+  } else {
+    if(pos == 1) {
+      return this->shift_ * sqrt(2 * this->L_);
+    } else if(pos % 2 == 0) {
+      return 2 * this->shift_ * sqrt(this->L_) / (M_PI * (pos / 2)) * sin(M_PI * (pos / 2));
+    } else {
+      return 2 * pow(this->L_, 1.5) / pow(M_PI * (pos / 2), 2) * (sin(M_PI * (pos / 2)) - M_PI * (pos / 2) * cos(M_PI * (pos / 2)));
+    }
+  }
+}
+
+// Compute int_a^b x^2*phi_pos(x) dx analytically.
+// Used by covMat() to evaluate E[x^2] for variance: Var[x] = E[x^2] - E[x]^2.
+double BasisFunc::int2(int pos) const {
+  if(this->kernel_) {
+    if(pos == 1) {
+      return (pow(this->dom_.second, 3) - pow(this->dom_.first, 3)) / 3;
+    } else {
+      double a = this->dom_.first;
+      double b = this->dom_.second;
+      double c = this->centers_[pos - 2];
+      double dx_sq = pow(this->dx_, 2);
+      double sqrt2pi = sqrt(2 * M_PI);
+      double sqrt2_dx = sqrt(2) * this->dx_;
+      double exp1 = exp(-pow(a - c, 2) / (2 * dx_sq));
+      double exp2 = exp(-pow(b - c, 2) / (2 * dx_sq));
+      double exp3 = exp(-pow(a - 2 * b + c, 2) / (2 * dx_sq));
+      double exp4 = exp(-pow(b - 2 * a + c, 2) / (2 * dx_sq));
+      double term1 = 2 * this->dx_ * (
+          a * (2 * exp1 + 2 * exp2 - exp3) +
+          b * (exp4 - 2 * exp1 - 2 * exp2) +
+          c * (exp4 - exp3)
+      );
+      double diff1_sq = pow(b - a + c, 2) + dx_sq;
+      double diff2_sq = pow(a - b + c, 2) + dx_sq;
+      double c_sq = pow(c, 2) + dx_sq;
+      double erf1 = erf((a - c) / sqrt2_dx);
+      double erf2 = erf((2 * a - b - c) / sqrt2_dx);
+      double erf3 = erf((c - a) / sqrt2_dx);
+      double erf4 = erf((a - 2 * b + c) / sqrt2_dx);
+      double erf5 = erf((c - b) / sqrt2_dx);
+      double term2 = diff1_sq * sqrt2pi * erf1 - diff1_sq * sqrt2pi * erf2;
+      double term3 = sqrt2pi * (
+          c_sq * erf3 - diff2_sq * erf4 +
+          (a - b) * (a - b + 2 * c) * erf5
+      );
+      return this->dx_ / 2 * (term1 + term2 + term3);
+    }
+  } else {
+    if(pos == 1) {
+      return sqrt(2 * this->L_) / 3 * (3 * pow(this->shift_, 2) + pow(this->L_, 2));
+    } else if(pos % 2 == 0) {
+      return 2 * sqrt(this->L_) / pow(M_PI * (pos / 2), 3) *
+            (2 * pow(this->L_, 2) * M_PI * (pos / 2) * cos(M_PI * (pos / 2)) +
+            (pow(M_PI * (pos / 2), 2) * (pow(this->shift_, 2) +
+            pow(this->L_, 2)) - 2 * pow(this->L_, 2)) * sin(M_PI * (pos / 2)));
+    } else {
+      return 4 * this->shift_ * pow(this->L_, 1.5) / pow(M_PI * (pos / 2), 2) * (sin(M_PI * (pos / 2)) - M_PI * (pos / 2) * cos(M_PI * (pos / 2)));
+    }
+  }
+}
+
+}
+}
diff --git a/src/ttsketch/BasisFunc.h b/src/ttsketch/BasisFunc.h
new file mode 100644
index 0000000000..68559a87d2
--- /dev/null
+++ b/src/ttsketch/BasisFunc.h
@@ -0,0 +1,78 @@
+#ifndef __PLUMED_ttsketch_BasisFunc_h
+#define __PLUMED_ttsketch_BasisFunc_h
+#include "tools/Matrix.h"
+#include <utility>
+#include <vector>
+
+namespace PLMD {
+namespace ttsketch {
+
+// Represents a 1D basis expansion for one CV dimension, used to express the
+// bias potential as a tensor train over basis function indices. Two types are
+// supported:
+//
+//   Fourier (kernel_=false): orthonormal sinusoidal functions on [a, b].
+//     phi_1(x) = 1/sqrt(2L)
+//     phi_{2k}(x) = 1/sqrt(L) * cos(pi*(x-shift)*k/L)   (k = 1, 2, ...)
+//     phi_{2k+1}(x) = 1/sqrt(L) * sin(pi*(x-shift)*k/L) (k = 1, 2, ...)
+//
+//   Kernel (kernel_=true): periodized Gaussian kernels on a uniform grid,
+//     plus a constant function phi_1(x) = 1.
+//
+// "Conv" (convolution) mode pre-smooths basis functions by convolving with a
+// Gaussian of width w_. For Fourier bases this multiplies each harmonic by
+// exp(-(pi*w*k)^2 / (2L^2)); for kernel bases it widens each kernel
+// analytically. This yields smooth bias forces at no extra runtime cost.
+class BasisFunc {
+
+private:
+  std::pair<double, double> dom_; // CV domain [a, b]
+  int nbasis_;                    // number of basis functions n (including constant phi_1)
+  double L_;                      // domain half-length: L = (b-a)/2
+  double shift_;                  // domain midpoint: shift = (a+b)/2
+  double w_;                      // smoothing width for conv mode (Fourier: sigma in x-units; kernel: Gaussian hill width)
+  bool kernel_;                   // true = kernel basis, false = Fourier basis
+  double dx_;                     // kernel basis function width (std dev of each periodized Gaussian)
+  std::vector<double> centers_;   // centers of kernel basis functions (size nbasis-1, uniformly spaced)
+  Matrix<double> ginv_;           // pseudo-inverse of the Gram matrix (kernel basis only)
+  Matrix<double> gram_;           // Gram matrix: gram_(i,j) = int phi_i(x)*phi_j(x) dx (kernel basis only)
+
+public:
+  BasisFunc();
+  BasisFunc(std::pair<double, double> dom, int nbasis, double w, bool kernel, double dx);
+  // Evaluate Fourier basis function phi_pos(x) (1-indexed, not convolved).
+  double fourier(double x, int pos) const;
+  // Evaluate kernel basis function phi_pos(x) (1-indexed, not convolved).
+  // Uses periodic images (k=-1,0,1) so the function wraps smoothly across domain boundaries.
+  double gaussian(double x, int pos) const;
+  // Derivative of fourier(x, pos) w.r.t. x.
+  double fourierd(double x, int pos) const;
+  // Derivative of gaussian(x, pos) w.r.t. x.
+  double gaussiand(double x, int pos) const;
+  // Evaluate basis function phi_pos(x). If conv=true, applies Gaussian smoothing of width w_.
+  double operator()(double x, int pos, bool conv) const;
+  // Derivative of operator()(x, pos, conv) w.r.t. x.
+  double grad(double x, int pos, bool conv) const;
+  int nbasis() const { return this->nbasis_; }
+  const std::pair<double, double>& dom() const { return this->dom_; }
+  double w() const { return this->w_; }
+  // Integral of phi_pos over the domain: int_a^b phi_pos(x) dx.
+  // Used to compute the partition function Z in covMat().
+  double int0(int pos) const;
+  // Integral of x*phi_pos over the domain: int_a^b x*phi_pos(x) dx.
+  // Used to compute the marginal mean E[x] in covMat().
+  double int1(int pos) const;
+  // Integral of x^2*phi_pos over the domain: int_a^b x^2*phi_pos(x) dx.
+  // Used to compute E[x^2] for the marginal variance in covMat().
+  double int2(int pos) const;
+  bool kernel() const { return this->kernel_; }
+  const Matrix<double>& ginv() const { return this->ginv_; }
+  const Matrix<double>& gram() const { return this->gram_; }
+  double center(int pos) const { return this->kernel_ ? this->centers_[pos - 1] : 0.0; }
+  double dx() const { return this->dx_; }
+};
+
+}
+}
+
+#endif
diff --git a/src/ttsketch/COPYRIGHT b/src/ttsketch/COPYRIGHT
new file mode 100644
index 0000000000..b3b9fb53a4
--- /dev/null
+++ b/src/ttsketch/COPYRIGHT
@@ -0,0 +1,17 @@
+Copyright (c) 2026, Nils E. Strand, Siyao Yang, Yuehaw Khoo, and Aaron R. Dinner
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
diff --git a/src/ttsketch/Makefile b/src/ttsketch/Makefile
new file mode 100644
index 0000000000..e69a015a6c
--- /dev/null
+++ b/src/ttsketch/Makefile
@@ -0,0 +1,3 @@
+USE=core tools bias
+# generic makefile
+include ../maketools/make.module
diff --git a/src/ttsketch/TTHelper.cpp b/src/ttsketch/TTHelper.cpp
new file mode 100644
index 0000000000..8894207c3a
--- /dev/null
+++ b/src/ttsketch/TTHelper.cpp
@@ -0,0 +1,179 @@
+#include "TTHelper.h"
+
+using namespace std;
+using namespace itensor;
+
+namespace PLMD {
+namespace ttsketch {
+
+void ttWrite(const string& filename, const MPS& tt, unsigned count) {
+  // count starts at 2 on the first sketch call; open for writing on first call, append thereafter
+  auto f = count == 2 ? h5_open(filename, 'w') : h5_open(filename, 'a');
+  h5_write(f, "tt_" + to_string(count - 1), tt);
+}
+
+MPS ttRead(const string& filename, unsigned count) {
+  auto f = h5_open(filename, 'r');
+  auto tt = h5_read<MPS>(f, "tt_" + to_string(count - 1));
+  return tt;
+}
+
+void ttSumWrite(const string& filename, const MPS& tt, unsigned count) {
+  auto f = h5_open(filename, 'a');
+  h5_write(f, "vb_" + to_string(count - 1), tt);
+}
+
+MPS ttSumRead(const string& filename, unsigned count) {
+  auto f = h5_open(filename, 'r');
+  auto tt = h5_read<MPS>(f, "vb_" + to_string(count - 1));
+  return tt;
+}
+
+// Evaluate TT by contracting each core with its 1D basis vector phi(x_i):
+//   result = sum_{i_1,...,i_d} G_1[i_1] * ... * G_d[i_d] * phi_1(x_1,i_1) * ... * phi_d(x_d,i_d)
+// The contraction is performed left-to-right so intermediate results stay rank-1 scalars.
+double ttEval(const MPS& tt, const vector<BasisFunc>& basis, const vector<double>& elements, bool conv) {
+  int d = length(tt);
+  auto s = siteInds(tt);
+  vector<ITensor> basis_evals(d);
+  for(int i = 1; i <= d; ++i) {
+    basis_evals[i - 1] = ITensor(s(i));
+    for(int j = 1; j <= dim(s(i)); ++j) {
+      basis_evals[i - 1].set(s(i) = j, basis[i - 1](elements[i - 1], j, conv));
+    }
+  }
+  auto result = tt(1) * basis_evals[0];
+  for(int i = 2; i <= d; ++i) {
+    result *= tt(i) * basis_evals[i - 1];
+  }
+  return elt(result);
+}
+
+// Gradient of ttEval w.r.t. elements. For each dimension k, one TT contraction is performed
+// with the k-th basis vector replaced by its derivative d phi_k/dx_k (chain rule).
+// All basis evaluations are precomputed to avoid redundant work across the d contractions.
+vector<double> ttGrad(const MPS& tt, const vector<BasisFunc>& basis, const vector<double>& elements, bool conv) {
+  int d = length(tt);
+  auto s = siteInds(tt);
+  vector<double> grad(d, 0.0);
+  vector<ITensor> basis_evals(d), basisd_evals(d);
+  for(int i = 1; i <= d; ++i) {
+    basis_evals[i - 1] = basisd_evals[i - 1] = ITensor(s(i));
+    for(int j = 1; j <= dim(s(i)); ++j) {
+      basis_evals[i - 1].set(s(i) = j, basis[i - 1](elements[i - 1], j, conv));
+      basisd_evals[i - 1].set(s(i) = j, basis[i - 1].grad(elements[i - 1], j, conv));
+    }
+  }
+  for(int k = 1; k <= d; ++k) {
+    // replace dimension k's basis vector with its derivative, keep all others unchanged
+    auto result = tt(1) * (k == 1 ? basisd_evals[0] : basis_evals[0]);
+    for(int i = 2; i <= d; ++i) {
+      result *= tt(i) * (k == i ? basisd_evals[i - 1] : basis_evals[i - 1]);
+    }
+    grad[k - 1] = elt(result);
+  }
+  return grad;
+}
+
+// Compute covariance matrix, marginal means, and partition function of the TT distribution.
+// Precomputes ITensors for the three moment integrals int0/int1/int2 per dimension,
+// then evaluates expectations as TT contractions with these integral vectors.
+tuple<Matrix<double>, vector<double>, double> covMat(const MPS& tt, const vector<BasisFunc>& basis) {
+  int d = length(tt);
+  auto s = siteInds(tt);
+  // integral vectors: int0[i][j] = int phi_j(x_i) dx_i, etc.
+  vector<ITensor> basis_int0(d), basis_int1(d), basis_int2(d);
+  for(int i = 1; i <= d; ++i) {
+    basis_int0[i - 1] = basis_int1[i - 1] = basis_int2[i - 1] = ITensor(s(i));
+    for(int j = 1; j <= dim(s(i)); ++j) {
+      basis_int0[i - 1].set(s(i) = j, basis[i - 1].int0(j));
+      basis_int1[i - 1].set(s(i) = j, basis[i - 1].int1(j));
+      basis_int2[i - 1].set(s(i) = j, basis[i - 1].int2(j));
+    }
+  }
+  // Z = int tt(x) dx (partition function); normalize to get probability density rho
+  auto Z = tt(1) * basis_int0[0];
+  for(int i = 2; i <= d; ++i) {
+    Z *= tt(i) * basis_int0[i - 1];
+  }
+  auto rho = tt;
+  rho /= elt(Z);
+  // ei[k]  = E[x_k],  eii[k] = E[x_k^2],  eij[k][l] = E[x_k * x_l] for k < l
+  vector<double> ei(d), eii(d);
+  vector<vector<double>> eij(d, vector<double>(d));
+  for(int k = 1; k <= d; ++k) {
+    // replace dimension k's integral vector with int1 (resp. int2) to get E[x_k] (resp. E[x_k^2])
+    auto eival = rho(1) * (k == 1 ? basis_int1[0] : basis_int0[0]);
+    auto eiival = rho(1) * (k == 1 ? basis_int2[0] : basis_int0[0]);
+    for(int i = 2; i <= d; ++i) {
+      eival *= rho(i) * (k == i ? basis_int1[i - 1] : basis_int0[i - 1]);
+      eiival *= rho(i) * (k == i ? basis_int2[i - 1] : basis_int0[i - 1]);
+    }
+    ei[k - 1] = elt(eival);
+    eii[k - 1] = elt(eiival);
+    for(int l = k + 1; l <= d; ++l) {
+      // replace both dimensions k and l with int1 to get E[x_k * x_l]
+      auto eijval = rho(1) * (k == 1 ? basis_int1[0] : basis_int0[0]);
+      for(int i = 2; i <= d; ++i) {
+        eijval *= rho(i) * (k == i || l == i ? basis_int1[i - 1] : basis_int0[i - 1]);
+      }
+      eij[k - 1][l - 1] = elt(eijval);
+    }
+  }
+  // sigma(k,l) = Cov(x_k, x_l) = E[x_k*x_l] - E[x_k]*E[x_l]
+  Matrix<double> sigma(d, d);
+  for(int k = 1; k <= d; ++k) {
+    for(int l = k; l <= d; ++l) {
+      sigma(k - 1, l - 1) = sigma(l - 1, k - 1) = k == l ? eii[k - 1] - pow(ei[k - 1], 2) : eij[k - 1][l - 1] - ei[k - 1] * ei[l - 1];
+    }
+  }
+  return make_tuple(sigma, ei, elt(Z));
+}
+
+// Compute the 2D marginal density of the normalized TT distribution for dimensions
+// pos1 and pos2 on a (bins x bins) grid. All other dimensions are integrated out
+// analytically using int0, which collapses those TT cores to scalar factors.
+void marginal2d(const MPS& tt, const vector<BasisFunc>& basis, int pos1, int pos2, vector<vector<double>>& grid, bool conv) {
+  int bins = grid.size();
+  int d = length(tt);
+  auto s = siteInds(tt);
+  vector<ITensor> basis_int0(d);
+  for(int i = 1; i <= d; ++i) {
+    basis_int0[i - 1] = ITensor(s(i));
+    for(int j = 1; j <= dim(s(i)); ++j) {
+      basis_int0[i - 1].set(s(i) = j, basis[i - 1].int0(j));
+    }
+  }
+  // normalize to probability density
+  auto Z = tt(1) * basis_int0[0];
+  for(int i = 2; i <= d; ++i) {
+    Z *= tt(i) * basis_int0[i - 1];
+  }
+  auto rho = tt;
+  rho /= elt(Z);
+  for(int i = 0; i < bins; ++i) {
+    for(int j = 0; j < bins; ++j) {
+      double x = basis[pos1 - 1].dom().first + i * (basis[pos1 - 1].dom().second - basis[pos1 - 1].dom().first) / bins;
+      double y = basis[pos2 - 1].dom().first + j * (basis[pos2 - 1].dom().second - basis[pos2 - 1].dom().first) / bins;
+      ITensor xevals(s(pos1)), yevals(s(pos2));
+      for(int k = 1; k <= dim(s(pos1)); ++k) {
+        xevals.set(s(pos1) = k, basis[pos1 - 1](x, k, conv));
+        yevals.set(s(pos2) = k, basis[pos2 - 1](y, k, conv));
+      }
+      auto val = rho(1) * (pos1 == 1 ? xevals : basis_int0[0]);
+      for(int k = 2; k <= d; ++k) {
+        if(pos1 == k) {
+          val *= rho(k) * xevals;
+        } else if(pos2 == k) {
+          val *= rho(k) * yevals;
+        } else {
+          val *= rho(k) * basis_int0[k - 1];
+        }
+      }
+      grid[i][j] = elt(val);
+    }
+  }
+}
+
+}
+}
diff --git a/src/ttsketch/TTHelper.h b/src/ttsketch/TTHelper.h
new file mode 100644
index 0000000000..ecaefec3ac
--- /dev/null
+++ b/src/ttsketch/TTHelper.h
@@ -0,0 +1,39 @@
+#ifndef __PLUMED_ttsketch_TTHelper_h
+#define __PLUMED_ttsketch_TTHelper_h
+#include "BasisFunc.h"
+#include "tools/Matrix.h"
+#include "itensor/all.h"
+
+namespace PLMD {
+namespace ttsketch {
+
+// Write TT to HDF5 file under dataset "tt_{count-1}".
+// count=2 opens the file for writing (creating/truncating); subsequent calls append.
+void ttWrite(const std::string& filename, const itensor::MPS& tt, unsigned count);
+// Read TT from HDF5 file at dataset "tt_{count-1}".
+itensor::MPS ttRead(const std::string& filename, unsigned count);
+// Append the cumulative bias TT (vb) to HDF5 file under dataset "vb_{count-1}".
+void ttSumWrite(const std::string& filename, const itensor::MPS& tt, unsigned count);
+// Read cumulative bias TT from HDF5 file at dataset "vb_{count-1}".
+itensor::MPS ttSumRead(const std::string& filename, unsigned count);
+// Evaluate the TT bias at point `elements` by contracting each core with its
+// 1D basis vector. If conv=true, basis functions are evaluated in convolution mode.
+double ttEval(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, const std::vector<double>& elements, bool conv);
+// Gradient of ttEval w.r.t. `elements`. Computed by replacing the k-th dimension's
+// basis vector with its derivative while keeping all others unchanged (chain rule).
+std::vector<double> ttGrad(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, const std::vector<double>& elements, bool conv);
+// Compute the covariance matrix, mean vector, and partition function of the TT distribution.
+// Normalizes tt by Z = int tt(x) dx, then returns:
+//   sigma(k,l) = E[x_k * x_l] - E[x_k]*E[x_l]  (covariance matrix)
+//   mu[k]      = E[x_k]                           (marginal means)
+//   Z          = int tt(x) dx                     (partition function)
+// All expectations use int0/int1/int2 evaluated analytically via BasisFunc.
+std::tuple<Matrix<double>, std::vector<double>, double> covMat(const itensor::MPS& tt, const std::vector<BasisFunc>& basis);
+// Fill `grid` (bins x bins) with the 2D marginal density of `tt` for dimensions
+// pos1 and pos2, obtained by integrating out all other dimensions analytically.
+void marginal2d(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, int pos1, int pos2, std::vector<std::vector<double>>& grid, bool conv);
+
+}
+}
+
+#endif
diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
new file mode 100644
index 0000000000..b9babd2f6f
--- /dev/null
+++ b/src/ttsketch/TTMetaD.cpp
@@ -0,0 +1,1088 @@
+/* Copyright (c) 2026, Nils E. Strand, Siyao Yang, Yuehaw Khoo, and Aaron R. Dinner
+ *
+ * Tensor train (TT)-metadynamics.
+ * https://arxiv.org/abs/2603.13549
+ * Before compiling, install the ITensor library (https://github.com/ITensor/ITensor) and ensure that HDF5 support is available.
+ * Copy the options.mk to the location of the ITensor installation and edit if needed.
+ * Run the following command from the PLUMED source directory (edit libraries if needed to match your options.mk):
+ * ./configure --enable-modules=ttsketch LDFLAGS=-L/path/to/ITensor/lib CPPFLAGS="-I/path/to/ITensor -DITENSOR_USE_HDF5 -DITENSOR_USE_OMP -D__ITENSOR_LAPACK_WRAP_h" LIBS="-litensor -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lhdf5 -lhdf5_hl" CXXFLAGS="-O3 -fconcepts"
+ */
+
+#include "TTHelper.h"
+#include "bias/Bias.h"
+#include "core/ActionRegister.h"
+#include "core/ActionSet.h"
+#include "core/PlumedMain.h"
+#include "tools/Exception.h"
+#include "tools/Communicator.h"
+#include "tools/File.h"
+#include "tools/OpenMP.h"
+
+using namespace std;
+using namespace itensor;
+using namespace PLMD::bias;
+
+namespace PLMD {
+namespace ttsketch {
+
+class TTMetaD : public Bias {
+
+private:
+  struct Gaussian {
+    bool multivariate;
+    double height;
+    vector<double> center;
+    vector<double> sigma;
+    vector<double> invsigma;
+    Gaussian(const double h, const vector<double>& c, const vector<double>& s)
+      : height(h), center(c), sigma(s), invsigma(s) {
+      for(unsigned i = 0; i < invsigma.size(); ++i) {
+        if(abs(invsigma[i]) > 1.e-20) {
+          invsigma[i] = 1.0 / invsigma[i];
+        } else {
+          invsigma[i] = 0.0;
+        }
+      }
+    }
+  };
+  // standard metadynamics parameters
+  double kbt_;
+  int stride_;
+  bool welltemp_;
+  double biasf_;
+  string fmt_;
+  bool isFirstStep_;
+  double height0_;
+  vector<double> sigma0_;
+  vector<Gaussian> hills_;   // Gaussians deposited since the last TT update
+  OFile hillsOfile_;
+  string mw_dir_;
+  string hillsfname_;
+  bool walkers_mpi_;
+  int mpi_size_;
+  int mpi_rank_;
+  // TT-sketch parameters
+  unsigned d_;               // number of CV dimensions
+  int sketch_rc_;            // initial (sketch) rank for the random TT coefficient tensor
+  int sketch_r_;             // target TT bond dimension after SVD truncation (0 = use cutoff)
+  double sketch_cutoff_;     // relative SVD truncation threshold (used when sketch_r_=0)
+  int sketch_stride_;        // steps between TT bias updates (tau in the paper)
+  double sketch_alpha_;      // weight for non-constant basis functions in the sketch coefficient TT
+  vector<BasisFunc> sketch_basis_;  // one BasisFunc per CV dimension
+  unsigned sketch_count_;    // number of TT updates performed so far (starts at 1, increments after each update)
+  MPS vb_;                   // current TT approximation of the accumulated bias (V_bias^TT)
+  double sketch_until_;      // simulation time after which the bias is frozen (no further updates)
+  bool frozen_;              // true once the bias has been frozen (after sketch_until_)
+  bool sketch_conv_;         // true if Gaussian smoothing (conv mode) is active for basis evaluation
+  bool nonintrusive_;        // if true, uses the intrusive-free variant: accumulates Bemp directly
+                             // instead of re-projecting the previous vb_ through the Gram matrix
+  bool deterministic_;       // if true, use a fixed RNG seed (42) in createTTCoeff() for reproducibility
+  MPS B_prev_;               // accumulated tensor moment from all previous sketches (nonintrusive only)
+  vector<ITensor> A_prev_;   // accumulated cross-gram matrices per bond (nonintrusive only)
+
+  void readGaussians(IFile *ifile);
+  void writeGaussian(const Gaussian& hill, OFile& file);
+  double getHeight(const vector<double>& cv);
+  double getBias(const vector<double>& cv);
+  double getBiasAndDerivatives(const vector<double>& cv, vector<double>& der);
+  double evaluateGaussian(const vector<double>& cv, const Gaussian& hill);
+  double evaluateGaussianAndDerivatives(const vector<double>& cv, const Gaussian& hill, vector<double>& der, vector<double>& dp);
+  bool scanOneHill(IFile* ifile, vector<Value>& tmpvalues, vector<double>& center, vector<double>& sigma, double& height);
+  void paraSketch();
+  MPS createTTCoeff() const;
+  pair<vector<ITensor>, IndexSet> intBasisSample(const IndexSet& is) const;
+  tuple<MPS, vector<ITensor>, vector<ITensor>> formTensorMoment(const vector<ITensor>& M, const MPS& coeff, const IndexSet& is);
+  tuple<MPS, vector<ITensor>, vector<ITensor>> formTensorMomentVb(const MPS& coeff);
+
+public:
+  explicit TTMetaD(const ActionOptions&);
+  void calculate() override;
+  void update() override;
+  static void registerKeywords(Keywords& keys);
+};
+
+PLUMED_REGISTER_ACTION(TTMetaD, "TTMETAD")
+
+void TTMetaD::registerKeywords(Keywords& keys) {
+  Bias::registerKeywords(keys);
+  keys.use("ARG");
+  keys.addFlag("KERNEL_BASIS", false, "Specifies that local kernel basis should be used instead of Fourier basis");
+  keys.add("compulsory", "SIGMA", "the widths of the Gaussian hills");
+  keys.add("compulsory", "PACE", "the frequency for hill addition");
+  keys.add("compulsory", "FILE", "HILLS", "a file in which the list of added hills is stored");
+  keys.add("compulsory", "HEIGHT", "the heights of the Gaussian hills");
+  keys.add("optional", "FMT", "specify format for HILLS files (useful for decrease the number of digits in regtests)");
+  keys.add("optional", "BIASFACTOR", "use well tempered metadynamics and use this bias factor. Please note you must also specify temp");
+  keys.add("optional", "TEMP", "the system temperature - this is only needed if you are doing well-tempered metadynamics");
+  keys.addFlag("WALKERS_MPI", false, "To be used when gromacs + multiple walkers are used");
+  keys.add("optional", "WALKERS_DIR", "shared directory with the hills files from all the walkers");
+  keys.use("RESTART");
+  keys.add("optional", "SKETCH_RANK", "Target rank for TTSketch algorithm - compulsory if SKETCH_CUTOFF is not specified");
+  keys.add("optional", "SKETCH_CUTOFF", "Truncation error cutoff for singular value decomposition - compulsory if SKETCH_RANK is not specified");
+  keys.add("compulsory", "SKETCH_INITRANK", "Initial rank for TTSketch algorithm");
+  keys.add("compulsory", "SKETCH_PACE", "1e6", "The frequency for TT Vbias updates");
+  keys.add("compulsory", "INTERVAL_MIN", "Lower limits, outside the limits the system will not feel the biasing force");
+  keys.add("compulsory", "INTERVAL_MAX", "Upper limits, outside the limits the system will not feel the biasing force");
+  keys.add("compulsory", "SKETCH_NBASIS", "20", "Number of basis functions per dimension");
+  keys.add("compulsory", "SKETCH_ALPHA", "0.05", "Weight coefficient for random tensor train construction");
+  keys.add("optional", "SKETCH_UNTIL", "After this time, the bias potential freezes");
+  keys.add("optional", "SKETCH_WIDTH", "Width of Gaussian kernels for smoothing");
+  keys.add("optional", "KERNEL_DX", "Width of basis function kernels");
+  keys.addFlag("NONINTRUSIVE", false, "Sketching uses previous exact sum of Gaussians instead of TT approximation");
+  keys.addFlag("DETERMINISTIC", false, "Use a fixed random seed for TT sketch construction, ensuring reproducible results. Intended for regression testing.");
+}
+
+TTMetaD::TTMetaD(const ActionOptions& ao):
+  PLUMED_BIAS_INIT(ao),
+  kbt_(0.0),
+  stride_(0),
+  welltemp_(false),
+  biasf_(-1.0),
+  isFirstStep_(true),
+  height0_(numeric_limits<double>::max()),
+  mw_dir_(""),
+  walkers_mpi_(false),
+  mpi_size_(0),
+  mpi_rank_(0),
+  sketch_r_(0),
+  sketch_cutoff_(0.0),
+  sketch_count_(1),
+  sketch_until_(numeric_limits<double>::max()),
+  frozen_(false),
+  sketch_conv_(false),
+  nonintrusive_(false),
+  deterministic_(false)
+{
+  bool kernel;
+  parseFlag("KERNEL_BASIS", kernel);
+  this->d_ = getNumberOfArguments();
+  if(this->d_ < 2) {
+    error("Number of arguments must be at least 2");
+  }
+  parse("FMT", this->fmt_);
+  parseVector("SIGMA", this->sigma0_);
+  if(this->sigma0_.size() != d_) {
+    error("number of arguments does not match number of SIGMA parameters");
+  }
+  parse("HEIGHT", this->height0_);
+  parse("PACE", this->stride_);
+  if(stride_ <= 0) {
+    error("frequency for hill addition is nonsensical");
+  }
+  this->hillsfname_ = "HILLS";
+  parse("FILE", this->hillsfname_);
+  parse("BIASFACTOR", this->biasf_);
+  if(this->biasf_ < 1.0 && this->biasf_ != -1.0) {
+    error("well tempered bias factor is nonsensical");
+  }
+  this->kbt_ = getkBT();
+  if(this->biasf_ >= 1.0) {
+    if(this->kbt_ == 0.0) {
+      error("Unless the MD engine passes the temperature to plumed, with well-tempered metad you must specify it using TEMP");
+    }
+    this->welltemp_ = true;
+  }
+
+  parseFlag("WALKERS_MPI", this->walkers_mpi_);
+  parse("WALKERS_DIR", this->mw_dir_);
+  if(this->walkers_mpi_ && this->mw_dir_== "") {
+    const string ret = filesystem::current_path();
+    this->mw_dir_ = ret + "/";
+    multi_sim_comm.Bcast(this->mw_dir_, 0);
+  }
+  if(this->walkers_mpi_ && this->mw_dir_ != "") {
+    this->hillsfname_ = this->mw_dir_ + "/" + this->hillsfname_;
+  }
+
+  parse("SKETCH_RANK", this->sketch_r_);
+  parse("SKETCH_CUTOFF", this->sketch_cutoff_);
+  if(this->sketch_r_ <= 0 && (this->sketch_cutoff_ <= 0.0 || this->sketch_cutoff_ > 1.0)) {
+    error("Valid SKETCH_RANK or SKETCH_CUTOFF needs to be specified");
+  }
+  parse("SKETCH_INITRANK", this->sketch_rc_);
+  if(this->sketch_rc_ <= 0) {
+    error("SKETCH_INITRANK must be positive");
+  }
+  parse("SKETCH_PACE", this->sketch_stride_);
+  if(this->sketch_stride_ <= 0) {
+    error("SKETCH_PACE must be positive");
+  }
+  vector<double> interval_min;
+  parseVector("INTERVAL_MIN", interval_min);
+  if(interval_min.size() != this->d_) {
+    error("Number of arguments does not match number of INTERVAL_MIN parameters");
+  }
+  vector<double> interval_max;
+  parseVector("INTERVAL_MAX", interval_max);
+  if(interval_max.size() != this->d_) {
+    error("Number of arguments does not match number of INTERVAL_MAX parameters");
+  }
+  int nbasis = 20;
+  parse("SKETCH_NBASIS", nbasis);
+  if(nbasis <= 1) {
+    error("SKETCH_NBASIS must be greater than 1");
+  }
+  if(!kernel && nbasis % 2 == 0) {
+    ++nbasis;
+  }
+  parse("SKETCH_ALPHA", this->sketch_alpha_);
+  if(this->sketch_alpha_ <= 0.0 || this->sketch_alpha_ > 1.0) {
+    error("SKETCH_ALPHA must be positive and no greater than 1");
+  }
+  vector<double> w;
+  parseVector("SKETCH_WIDTH", w);
+  if(w.size() == 0) {
+    w.assign(this->d_, 0.0);
+  }
+  if(w.size() != this->d_) {
+    error("Number of arguments does not match number of SKETCH_WIDTH parameters");
+  }
+  for (double val : w) {
+    if (val != 0.0) {
+      this->sketch_conv_ = true;
+    }
+  }
+  vector<double> dx;
+  parseVector("KERNEL_DX", dx);
+  if(dx.size() == 0) {
+    dx.resize(this->d_, 0.0);
+  }
+  if(dx.size() != this->d_) {
+    error("Number of arguments does not match number of KERNEL_DX parameters");
+  }
+  for(unsigned i = 0; i < this->d_; ++i) {
+    if(this->sketch_conv_ && w[i] <= 0.0) {
+      error("Gaussian smoothing requires positive WIDTH");
+    }
+    if(kernel && dx[i] < 0.0) {
+      error("Kernel basis requires positive KERNEL_DX");
+    }
+    if(interval_max[i] <= interval_min[i]) {
+      error("INTERVAL_MAX parameters need to be greater than respective INTERVAL_MIN parameters");
+    }
+    this->sketch_basis_.push_back(BasisFunc(make_pair(interval_min[i], interval_max[i]), nbasis, w[i], kernel, dx[i]));
+  }
+  if(kernel && this->sketch_conv_) {
+    error("kernel smoothing incompatible with kernel basis");
+  }
+  if(this->walkers_mpi_) {
+    this->mpi_size_ = multi_sim_comm.Get_size();
+    this->mpi_rank_ = multi_sim_comm.Get_rank();
+  }
+
+  parse("SKETCH_UNTIL", this->sketch_until_);
+
+  parseFlag("NONINTRUSIVE", this->nonintrusive_);
+  parseFlag("DETERMINISTIC", this->deterministic_);
+
+  if(getRestart()) {
+    string ttfilename = "ttsketch.h5";
+    if(this->walkers_mpi_) {
+      ttfilename = "../" + ttfilename;
+    }
+    while(true) {
+      try {
+        this->vb_ = ttRead(ttfilename, ++this->sketch_count_);
+      } catch(...) {
+        --this->sketch_count_;
+        break;
+      }
+    }
+    if(this->sketch_count_ == 1) {
+      this->vb_ = MPS();
+    }
+    if(getTime() >= this->sketch_until_) {
+      this->frozen_ = true;
+    } else {
+      IFile hills_ifile;
+      if(!hills_ifile.FileExist(this->hillsfname_)) {
+        error("The hills file cannot be found");
+      }
+      hills_ifile.open(this->hillsfname_);
+      readGaussians(&hills_ifile);
+      hills_ifile.close();
+    }
+  }
+
+  if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+    this->hillsOfile_.link(*this);
+    this->hillsOfile_.enforceSuffix("");
+    this->hillsOfile_.open(this->hillsfname_);
+    if(this->fmt_.length() > 0) {
+      this->hillsOfile_.fmtField(this->fmt_);
+    }
+    hillsOfile_.setHeavyFlush();
+    for(unsigned i = 0; i < this->d_; ++i) {
+      hillsOfile_.setupPrintValue(getPntrToArgument(i));
+    }
+  }
+}
+
+void TTMetaD::readGaussians(IFile *ifile) {
+  vector<double> center(this->d_);
+  vector<double> sigma(this->d_);
+  double height;
+  int nhills = 0;
+
+  vector<Value> tmpvalues;
+  for(unsigned j = 0; j < this->d_; ++j) {
+    tmpvalues.push_back(Value(this, getPntrToArgument(j)->getName(), false));
+  }
+
+  while(scanOneHill(ifile, tmpvalues, center, sigma, height)) {
+    ++nhills;
+    if(this->welltemp_ && this->biasf_ > 1.0) {
+      height *= (this->biasf_ - 1.0) / this->biasf_;
+    }
+    this->hills_.push_back(Gaussian(height, center, sigma));
+  }
+  log << "  restarting from step " << this->sketch_count_ << "\n";
+  log << "  " << nhills << " hills retrieved\n";
+}
+
+bool TTMetaD::scanOneHill(IFile* ifile, vector<Value>& tmpvalues, vector<double>& center, vector<double>& sigma, double& height) {
+  double dummy;
+  if(ifile->scanField("time", dummy)) {
+    unsigned ncv = tmpvalues.size();
+    for(unsigned i = 0; i < ncv; ++i) {
+      ifile->scanField(&tmpvalues[i]);
+      if(tmpvalues[i].isPeriodic() && !getPntrToArgument(i)->isPeriodic()) {
+        error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
+      } else if(tmpvalues[i].isPeriodic()) {
+        string imin, imax;
+        tmpvalues[i].getDomain(imin, imax);
+        string rmin, rmax;
+        getPntrToArgument(i)->getDomain(rmin, rmax);
+        if(imin != rmin || imax != rmax) {
+          error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
+        }
+      }
+      center[i] = tmpvalues[i].get();
+    }
+    for(unsigned i = 0; i < ncv; ++i) {
+      ifile->scanField("sigma_" + getPntrToArgument(i)->getName(), sigma[i]);
+    }
+
+    ifile->scanField("height", height);
+    ifile->scanField("biasf", dummy);
+    ifile->scanField();
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void TTMetaD::writeGaussian(const Gaussian& hill, OFile&file) {
+  file.printField("time", getTimeStep() * getStep());
+  for(unsigned i = 0; i < this->d_; ++i) {
+    file.printField(getPntrToArgument(i), hill.center[i]);
+  }
+  for(unsigned i = 0; i < this->d_; ++i) {
+    file.printField("sigma_" + getPntrToArgument(i)->getName(), hill.sigma[i]);
+  }
+  double height = hill.height;
+  if(this->welltemp_ && this->biasf_ > 1.0) {
+    height *= this->biasf_ / (this->biasf_ - 1.0);
+  }
+  file.printField("height", height).printField("biasf", this->biasf_);
+  file.printField();
+}
+
+// Called every MD step: evaluate total bias (TT + remaining Gaussians) and set forces.
+void TTMetaD::calculate() {
+  vector<double> cv(this->d_);
+  for(unsigned i = 0; i < this->d_; ++i) {
+    cv[i] = getArgument(i);
+  }
+
+  vector<double> der(this->d_, 0.0);
+
+  double ene = getBiasAndDerivatives(cv, der);
+  setBias(ene);
+  for(unsigned i = 0; i < this->d_; ++i) {
+    setOutputForce(i, -der[i]);
+  }
+}
+
+// Called every MD step after forces are applied.
+// Two separate triggers:
+//   1. Every sketch_stride_ steps: run paraSketch() to compress accumulated hills into vb_,
+//      clear the hills list, and rewind the HILLS file for the next interval.
+//   2. Every stride_ steps: deposit a new Gaussian hill and write it to the HILLS file.
+void TTMetaD::update() {
+  bool nowAddATT;
+  if(getStep() % this->sketch_stride_ == 0 && !this->isFirstStep_ && !this->frozen_) {
+    nowAddATT = true;
+    if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+      this->hillsOfile_.flush();
+    }
+  } else {
+    nowAddATT = false;
+  }
+
+  if(nowAddATT) {
+    if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+      unsigned N = this->hills_.size();
+      log << "Sample limits\n";
+      for(unsigned i = 0; i < this->d_; ++i) {
+        auto [large, small] = this->sketch_basis_[i].dom();
+        for(unsigned j = 0; j < N; ++j) {
+          if(this->hills_[j].center[i] > large) {
+            large = this->hills_[j].center[i];
+          }
+          if(this->hills_[j].center[i] < small) {
+            small = this->hills_[j].center[i];
+          }
+        }
+        log << small << " " << large << "\n";
+      }
+
+      log << "\nEmpirical means:\n";
+      Matrix<double> sigmahat(this->d_, this->d_);
+      vector<double> muhat(this->d_, 0.0);
+      for(unsigned k = 0; k < this->d_; ++k) {
+        for(unsigned j = 0; j < N; ++j) {
+          muhat[k] += this->hills_[j].center[k] / N;
+        }
+        log << muhat[k] << " ";
+      }
+      log << "\nEmpirical covariance matrix:\n";
+      for(unsigned k = 0; k < this->d_; ++k) {
+        for(unsigned l = k; l < this->d_; ++l) {
+          sigmahat(k, l) = sigmahat(l, k) = 0.0;
+          for(unsigned j = 0; j < N; ++j) {
+            sigmahat(k, l) += (this->hills_[j].center[k] - muhat[k]) * (this->hills_[j].center[l] - muhat[l]) / (N - 1);
+          }
+          sigmahat(l, k) = sigmahat(k, l);
+        }
+      }
+      matrixOut(log, sigmahat);
+
+      // record current bias at hill centers before update, for computing relative error after
+      vector<double> A0(N);
+      vector<vector<double>> x(N);
+      for(unsigned i = 0; i < N; ++i) {
+        x[i] = this->hills_[i].center;
+        A0[i] = getBias(x[i]);
+      }
+
+      log << "\nStarting TT-sketch...\n";
+      log.flush();
+      paraSketch();
+      ++this->sketch_count_;
+
+      this->hills_.clear();
+
+      // compute relative L2 approximation error at hill centers: ||V_new - V_old|| / ||V_old||
+      vector<double> diff(N);
+      for(unsigned i = 0; i < N; ++i) {
+        diff[i] = getBias(x[i]);
+      }
+      transform(diff.begin(), diff.end(), A0.begin(), diff.begin(), minus<double>());
+      log << "Relative l2 error = " << sqrt(norm(diff) / norm(A0)) << "\n\n";
+      log.flush();
+
+      string ttfilename = "ttsketch.h5";
+      if(this->walkers_mpi_) {
+        ttfilename = "../" + ttfilename;
+      }
+      ttWrite(ttfilename, this->vb_, this->sketch_count_);
+    }
+
+    if(this->walkers_mpi_) {
+      multi_sim_comm.Bcast(this->sketch_count_, 0);
+      if(this->mpi_rank_ != 0) {
+        this->hills_.clear();
+        this->vb_ = ttRead("../ttsketch.h5", this->sketch_count_);
+      }
+    }
+    if(getTime() >= this->sketch_until_) {
+      this->frozen_ = true;
+    } else if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+      this->hillsOfile_.rewind();
+      this->hillsOfile_.clearFields();
+      if(this->fmt_.length() > 0) {
+        this->hillsOfile_.fmtField(this->fmt_);
+      }
+      hillsOfile_.setHeavyFlush();
+      for(unsigned i = 0; i < this->d_; ++i) {
+        hillsOfile_.setupPrintValue(getPntrToArgument(i));
+      }
+    }
+  }
+
+  bool nowAddAHill;
+  if(getStep() % this->stride_ == 0 && !isFirstStep_ && !this->frozen_) {
+    nowAddAHill = true;
+  } else {
+    nowAddAHill = false;
+    this->isFirstStep_ = false;
+  }
+
+  vector<double> cv(this->d_);
+  for(unsigned i = 0; i < this->d_; ++i) {
+    cv[i] = getArgument(i);
+  }
+
+  if(nowAddAHill) {
+    double height = getHeight(cv);
+
+    if(this->walkers_mpi_) {
+      vector<double> all_cv(this->mpi_size_ * this->d_, 0.0);
+      vector<double> all_sigma(this->mpi_size_ * this->sigma0_.size(), 0.0);
+      vector<double> all_height(this->mpi_size_, 0.0);
+      multi_sim_comm.Allgather(cv, all_cv);
+      multi_sim_comm.Allgather(this->sigma0_, all_sigma);
+      multi_sim_comm.Allgather(height * (this->biasf_ > 1.0 ? this->biasf_ / (this->biasf_ - 1.0) : 1.0), all_height);
+
+      for(int i = 0; i < this->mpi_size_; i++) {
+        vector<double> cv_now(this->d_);
+        vector<double> sigma_now(this->sigma0_.size());
+        for(unsigned j = 0; j < this->d_; j++) {
+          cv_now[j] = all_cv[i * this->d_ + j];
+        }
+        for(unsigned j = 0; j < this->sigma0_.size(); j++) {
+          sigma_now[j] = all_sigma[i * this->sigma0_.size() + j];
+        }
+        double fact = (this->biasf_ > 1.0 ? (this->biasf_ - 1.0) / this->biasf_ : 1.0);
+        Gaussian newhill(all_height[i] * fact, cv_now, sigma_now);
+        this->hills_.push_back(newhill);
+        if(this->mpi_rank_ == 0) {
+          writeGaussian(newhill, hillsOfile_);
+        }
+      }
+    } else {
+      Gaussian newhill(height, cv, this->sigma0_);
+      this->hills_.push_back(newhill);
+      writeGaussian(newhill, hillsOfile_);
+    }
+  }
+
+  if(getStep() % this->sketch_stride_ == 1 && !this->frozen_) {
+    log << "Vbias update " << this->sketch_count_ << "...\n\n";
+    log.flush();
+  }
+}
+
+double TTMetaD::getHeight(const vector<double>& cv) {
+  double height = this->height0_;
+  if(this->welltemp_) {
+    double vbias = getBias(cv);
+    if(this->biasf_ > 1.0) {
+      height = this->height0_ * exp(-vbias / (this->kbt_ * (this->biasf_ - 1.0)));
+    } else {
+      height = this->height0_ * exp(-vbias / this->kbt_);
+    }
+  }
+  return height;
+}
+
+// Total bias = TT approximation of accumulated hills + sum of recently deposited Gaussians.
+double TTMetaD::getBias(const vector<double>& cv) {
+  double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
+  unsigned nt = OpenMP::getNumThreads();
+  #pragma omp parallel num_threads(nt)
+  {
+    #pragma omp for reduction(+:bias) nowait
+    for(unsigned i = 0; i < hills_.size(); ++i) {
+      bias += evaluateGaussian(cv, this->hills_[i]);
+    }
+  }
+  return bias;
+}
+
+double TTMetaD::getBiasAndDerivatives(const vector<double>& cv, vector<double>& der) {
+  double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
+  if(length(this->vb_) != 0) {
+    der = ttGrad(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
+  }
+  unsigned nt = OpenMP::getNumThreads();
+  if(this->hills_.size() < 2 * nt || nt == 1) {
+    vector<double> dp(this->d_);
+    for(unsigned i = 0; i < this->hills_.size(); ++i) {
+      bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], der, dp);
+    }
+  } else {
+    #pragma omp parallel num_threads(nt)
+    {
+      vector<double> omp_deriv(this->d_, 0.0);
+      vector<double> dp(this->d_);
+      #pragma omp for reduction(+:bias) nowait
+      for(unsigned i = 0; i < this->hills_.size(); ++i) {
+        bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], omp_deriv, dp);
+      }
+      #pragma omp critical
+      for(unsigned i = 0; i < this->d_; ++i) {
+        der[i] += omp_deriv[i];
+      }
+    }
+  }
+  return bias;
+}
+
+double TTMetaD::evaluateGaussian(const vector<double>& cv, const Gaussian& hill) {
+  double dp2 = 0.0;
+  for(unsigned i = 0; i < this->d_; i++) {
+    double dp = difference(i, hill.center[i], cv[i]) * hill.invsigma[i];
+    dp2 += dp * dp;
+  }
+  dp2 *= 0.5;
+
+  double bias = 0.0;
+  if(dp2 < dp2cutoff) {
+    bias = hill.height * exp(-dp2);
+  }
+
+  return bias;
+}
+
+double TTMetaD::evaluateGaussianAndDerivatives(const vector<double>& cv, const Gaussian& hill, vector<double>& der, vector<double>& dp) {
+  double dp2 = 0.0;
+  double bias = 0.0;
+  for(unsigned i = 0; i < this->d_; i++) {
+    dp[i] = difference(i, hill.center[i], cv[i]) * hill.invsigma[i];
+    dp2 += dp[i] * dp[i];
+  }
+  dp2 *= 0.5;
+  if(dp2 < dp2cutoff) {
+    bias = hill.height * exp(-dp2);
+    for(unsigned i = 0; i < this->d_; i++) {
+      der[i] -= bias * dp[i] * hill.invsigma[i];
+    }
+  }
+
+  return bias;
+}
+
+// TT-Sketch algorithm: compresses the N accumulated Gaussian hills plus the previous
+// TT bias vb_ into a new low-rank TT approximation G, which becomes the updated vb_.
+//
+// Overview (see paper Algorithm 2):
+//   1. createTTCoeff(): build a random sketch coefficient TT `coeff` of rank sketch_rc_.
+//   2. intBasisSample(): compute M[i][j,k] = <phi_k, g_j>_{dim i}, the inner product of
+//      each basis function with each Gaussian along dimension i.
+//   3. formTensorMoment(): contract coeff with M to get the tensor moment Bemp and
+//      left/right environment tensors envi_L, envi_R needed to form the normal equations.
+//   4. For each bond between cores k and k+1:
+//      - Form the cross-gram matrix A from envi_L and envi_R.
+//      - SVD A to get the rank-trimmed projection V (truncating to sketch_r_ or sketch_cutoff_).
+//   5. Recover TT cores G from Bemp and the pseudo-inverse of U*S.
+//   6. Apply Gram matrix correction (kernel basis only) to convert to dual basis coefficients.
+void TTMetaD::paraSketch() {
+  unsigned N = this->hills_.size();
+  auto coeff = createTTCoeff();
+  auto [M, is] = intBasisSample(siteInds(coeff));
+  MPS G(this->d_);
+
+  auto [Bemp, envi_L, envi_R] = formTensorMoment(M, coeff, is);
+  MPS Bemp_Vb;
+  vector<ITensor> envi_L_Vb;
+  vector<ITensor> envi_R_Vb;
+  if(this->sketch_count_ != 1) {
+    if(this->nonintrusive_) {
+      // Non-intrusive variant: accumulate tensor moments directly across sketches.
+      // B_prev_ stores the sum of Bemp from all prior intervals; re-index its bond/site
+      // indices to match the current coeff TT's indices before adding.
+      auto sites = siteInds(Bemp);
+      auto sites_prev = siteInds(this->B_prev_);
+      auto links = linkInds(Bemp);
+      auto links_prev = linkInds(this->B_prev_);
+      for(unsigned i = 1; i <= this->d_; ++i) {
+        this->B_prev_.ref(i) *= delta(sites(i), sites_prev(i));
+        if(i != 1) {
+          this->B_prev_.ref(i) *= delta(links(i - 1), links_prev(i - 1));
+        }
+        if (i != this->d_) {
+          this->B_prev_.ref(i) *= delta(links(i), links_prev(i));
+        }
+        Bemp.ref(i) += this->B_prev_(i);
+      }
+    } else {
+      // Intrusive variant: project the previous TT bias vb_ onto the current sketch's
+      // basis by pre-multiplying each core by the Gram matrix (kernel basis) or the
+      // convolution inner-product matrix (Fourier conv mode), then add its tensor moment.
+      if(this->sketch_basis_[0].kernel()) {
+        // Convert vb_ from dual basis to standard basis by applying Gram matrix per core
+        for(unsigned i = 1; i <= this->d_; ++i) {
+          auto s = siteIndex(this->vb_, i);
+          ITensor gram(s, prime(s));
+          for(int j = 1; j <= dim(s); ++j) {
+            for(int l = 1; l <= dim(s); ++l) {
+              gram.set(s = j, prime(s) = l, this->sketch_basis_[i - 1].gram()(j - 1, l - 1));
+            }
+          }
+          this->vb_.ref(i) *= gram;
+          this->vb_.ref(i).noPrime();
+        }
+      }
+      if(this->sketch_conv_) {
+        // Apply Fourier convolution damping factors to vb_ to account for basis smoothing
+        for(unsigned i = 1; i <= this->d_; ++i) {
+          double L = (this->sketch_basis_[i - 1].dom().second - this->sketch_basis_[i - 1].dom().first) / 2;
+          double w = this->sketch_basis_[i - 1].w();
+          auto s = siteIndex(this->vb_, i);
+          ITensor inner(s, prime(s));
+          for(int j = 1; j < dim(s); ++j) {
+            inner.set(s = j, prime(s) = j, exp(-pow(M_PI * w * (j / 2), 2) / (2 * pow(L, 2))));
+          }
+          this->vb_.ref(i) *= inner;
+          this->vb_.ref(i).noPrime();
+        }
+      }
+      auto vbresult = formTensorMomentVb(coeff);
+      Bemp_Vb = get<0>(vbresult);
+      envi_L_Vb = get<1>(vbresult);
+      envi_R_Vb = get<2>(vbresult);
+      for(unsigned i = 1; i <= this->d_; ++i) {
+        Bemp.ref(i) += Bemp_Vb(i);
+      }
+    }
+  }
+  if(this->nonintrusive_) {
+    this->B_prev_ = Bemp;
+    if(this->sketch_count_ == 1) {
+      this->A_prev_.resize(this->d_ - 1);
+    }
+  }
+  // For each bond between cores k-1 and k, form the cross-gram matrix A = L^T * R,
+  // where L = envi_L[k] and R = envi_R[k-2] are the left/right environment projections
+  // of coeff onto the sample indices. SVD A to find the rank-trimmed subspace V.
+  auto links = linkInds(coeff);
+  vector<ITensor> U(this->d_), S(this->d_), V(this->d_);
+  vector<Index> links_trimmed;
+  for(unsigned core_id = 2; core_id <= this->d_; ++core_id) {
+    int rank = dim(links(core_id - 1));
+    Matrix<double> LMat(N, rank), RMat(N, rank);
+    for(unsigned i = 1; i <= N; ++i) {
+      for(int j = 1; j <= rank; ++j) {
+        LMat(i - 1, j - 1) = envi_L[core_id - 1].elt(is(core_id) = i, links(core_id - 1) = j);
+        RMat(i - 1, j - 1) = envi_R[core_id - 2].elt(is(core_id - 1) = i, links(core_id - 1) = j);
+      }
+    }
+    Matrix<double> Lt, AMat, PMat, AMat_Vb;
+    transpose(LMat, Lt);
+    mult(Lt, RMat, AMat);
+
+    if(this->sketch_count_ != 1 && !this->nonintrusive_) {
+      auto ivb = linkIndex(this->vb_, core_id - 1);
+      int rank_vb = dim(ivb);
+      LMat = Matrix<double>(rank_vb, rank);
+      RMat = Matrix<double>(rank_vb, rank);
+      for(int i = 1; i <= rank_vb; ++i) {
+        for(int j = 1; j <= rank; ++j) {
+          LMat(i - 1, j - 1) = envi_L_Vb[core_id - 1].elt(ivb = i, links(core_id - 1) = j);
+          RMat(i - 1, j - 1) = envi_R_Vb[core_id - 2].elt(ivb = i, links(core_id - 1) = j);
+        }
+      }
+      transpose(LMat, Lt);
+      mult(Lt, RMat, AMat_Vb);
+    }
+
+    ITensor A(prime(links(core_id - 1)), links(core_id - 1));
+    for(int i = 1; i <= rank; ++i) {
+      for(int j = 1; j <= rank; ++j) {
+        A.set(prime(links(core_id - 1)) = i, links(core_id - 1) = j, AMat(i - 1, j - 1));
+      }
+    }
+    if(this->sketch_count_ != 1) {
+      if(this->nonintrusive_) {
+        this->A_prev_[core_id - 2] *= delta(this->A_prev_[core_id - 2].index(1), A.index(1));
+        this->A_prev_[core_id - 2] *= delta(this->A_prev_[core_id - 2].index(2), A.index(2));
+        A += this->A_prev_[core_id - 2];
+      } else {
+        ITensor A_Vb(prime(links(core_id - 1)), links(core_id - 1));
+        for(int i = 1; i <= rank; ++i) {
+          for(int j = 1; j <= rank; ++j) {
+            A_Vb.set(prime(links(core_id - 1)) = i, links(core_id - 1) = j, AMat_Vb(i - 1, j - 1));
+          }
+        }
+        A += A_Vb;
+      }
+    }
+    if(this->nonintrusive_) {
+      this->A_prev_[core_id - 2] = A;
+    }
+    auto original_link_tags = tags(links(core_id - 1));
+    V[core_id - 1] = ITensor(links(core_id - 1));
+    if(this->sketch_r_ > 0) {
+      svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1],
+          {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags, "MaxDim=", this->sketch_r_});
+    } else {
+      svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1], {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags});
+    }
+    links_trimmed.push_back(commonIndex(S[core_id - 1], V[core_id - 1]));
+  }
+
+  // Recover TT cores G from Bemp and the trimmed subspaces.
+  // G[1] = Bemp[1] * V[1] (project first core onto rank-trimmed subspace)
+  // G[k] = pinv(U[k]*S[k]) * Bemp[k] * V[k]  for 2 <= k < d
+  // G[d] = pinv(U[d]*S[d]) * Bemp[d]
+  G.ref(1) = Bemp(1) * V[1];
+  for(unsigned core_id = 2; core_id <= this->d_; ++core_id) {
+    int rank = dim(links(core_id - 1)), rank_trimmed = dim(links_trimmed[core_id - 2]);
+    ITensor A = U[core_id - 1] * S[core_id - 1];
+    ITensor Pinv(links_trimmed[core_id - 2], links(core_id - 1));
+    Matrix<double> AMat(rank, rank_trimmed), PMat;
+    for(int i = 1; i <= rank; ++i) {
+      for(int j = 1; j <= rank_trimmed; ++j) {
+        AMat(i - 1, j - 1) = A.elt(prime(links(core_id - 1)) = i, links_trimmed[core_id - 2] = j);
+      }
+    }
+    pseudoInvert(AMat, PMat);
+
+    for(int i = 1; i <= rank_trimmed; ++i) {
+      for(int j = 1; j <= rank; ++j) {
+        Pinv.set(links_trimmed[core_id - 2] = i, links(core_id - 1) = j, PMat(i - 1, j - 1));
+      }
+    }
+    G.ref(core_id) = Pinv * Bemp(core_id);
+    if(core_id != this->d_) {
+      G.ref(core_id) *= V[core_id];
+    }
+  }
+
+  log << "Final ranks ";
+  for(unsigned i = 1; i < this->d_; ++i) {
+    log << dim(linkIndex(G, i)) << " ";
+  }
+  log << "\n";
+  log.flush();
+
+  if(this->sketch_basis_[0].kernel()) {
+    // Apply pseudo-inverse of the Gram matrix to each core to convert from the
+    // primal Gram representation back to dual basis coefficients (G^+ * G_core).
+    for(unsigned i = 1; i <= this->d_; ++i) {
+      auto s = siteIndex(G, i);
+      ITensor ginv(s, prime(s));
+      for(int j = 1; j <= dim(s); ++j) {
+        for(int l = 1; l <= dim(s); ++l) {
+          ginv.set(s = j, prime(s) = l, this->sketch_basis_[i - 1].ginv()(j - 1, l - 1));
+        }
+      }
+      G.ref(i) *= ginv;
+      G.ref(i).noPrime();
+    }
+  }
+
+  this->vb_ = G;
+}
+
+// Build the random sketch coefficient TT `coeff` of bond dimension sketch_rc_.
+// Each element is drawn from N(0,1); then each core is multiplied by a diagonal
+// matrix diag(1, alpha, alpha, ...) so the constant basis function (pos=1) has
+// full weight while all other harmonics are scaled down by sketch_alpha_. This
+// ensures the sketch captures the large constant component of the bias accurately.
+MPS TTMetaD::createTTCoeff() const {
+  default_random_engine generator(this->deterministic_ ? 42u : static_cast<unsigned int>(time(nullptr)));
+  normal_distribution<double> distribution(0.0, 1.0);
+  int n = this->sketch_basis_[0].nbasis();
+  auto sites = SiteSet(this->d_, n);
+  auto coeff = MPS(sites, this->sketch_rc_);
+  for(int j = 1; j <= n; ++j) {
+    for(int k = 1; k <= this->sketch_rc_; ++k) {
+      coeff.ref(1).set(sites(1) = j, linkIndex(coeff, 1) = k, distribution(generator));
+    }
+  }
+  for(unsigned i = 2; i <= this->d_ - 1; ++i) {
+    for(int j = 1; j <= n; ++j) {
+      for(int k = 1; k <= this->sketch_rc_; ++k) {
+        for(int l = 1; l <= this->sketch_rc_; ++l) {
+          coeff.ref(i).set(sites(i) = j, linkIndex(coeff, i - 1) = k, linkIndex(coeff, i) = l, distribution(generator));
+        }
+      }
+    }
+  }
+  for(int j = 1; j <= n; ++j) {
+    for(int k = 1; k <= this->sketch_rc_; ++k) {
+      coeff.ref(this->d_).set(sites(this->d_) = j, linkIndex(coeff, this->d_ - 1) = k, distribution(generator));
+    }
+  }
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    auto s = sites(i);
+    auto sp = prime(s);
+    vector<double> Avec(n, this->sketch_alpha_);
+    Avec[0] = 1.0;
+    auto A = diagITensor(Avec, s, sp);
+    coeff.ref(i) *= A;
+    coeff.ref(i).noPrime();
+  }
+  return coeff;
+}
+
+// Compute the matrix M[dim i][sample j, basis k] = <phi_k, g_j>_{dim i}, the
+// analytical inner product of each 1D basis function with the 1D marginal of each
+// Gaussian hill along dimension i. The d-dimensional Gaussian factorizes as a
+// product of d 1D Gaussians, so we distribute height as h^(1/d) per dimension.
+// These integrals are computed in closed form:
+//   - Kernel basis: convolution of the kernel function with the 1D Gaussian hill
+//   - Fourier basis: Fourier transform of the 1D Gaussian (exponential decay * harmonic)
+// Returns M as a vector of ITensors (one per dimension) and a new IndexSet is_new
+// where is_new(i) indexes the N sample points for dimension i.
+pair<vector<ITensor>, IndexSet> TTMetaD::intBasisSample(const IndexSet& is) const {
+  unsigned N = this->hills_.size();
+  int nb = this->sketch_basis_[0].nbasis();
+  auto sites_new = SiteSet(this->d_, N);
+  vector<ITensor> M;
+  vector<Index> is_new;
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    double L = (this->sketch_basis_[i - 1].dom().second - this->sketch_basis_[i - 1].dom().first) / 2;
+    double a = (this->sketch_basis_[i - 1].dom().second + this->sketch_basis_[i - 1].dom().first) / 2;
+    M.push_back(ITensor(sites_new(i), is(i)));
+    is_new.push_back(sites_new(i));
+    for(unsigned j = 1; j <= N; ++j) {
+      double x = this->hills_[j - 1].center[i - 1];
+      double w = this->hills_[j - 1].sigma[i - 1];
+      double h = pow(this->hills_[j - 1].height, 1.0 / this->d_);  // per-dimension d-th root of height
+      for(int pos = 1; pos <= nb; ++pos) {
+        double result = 0.0;
+        if(this->sketch_basis_[0].kernel()) {
+          if(pos == 1) {
+            // <1, g_j> = integral of 1D Gaussian = sqrt(2*pi)*sigma
+            result = h * sqrt(2 * M_PI) * w;
+          } else {
+            // <kernel_c, g_j> = conv of two Gaussians (widths dx and w), with periodic images
+            double c = this->sketch_basis_[i - 1].center(pos - 1);
+            double dx = this->sketch_basis_[i - 1].dx();
+            for(int k = -1; k <= 1; ++k) {
+              result += exp(-pow(x - c + 2 * k * L, 2) / (2 * (pow(dx, 2) + pow(w, 2)))) * h * sqrt(2 * M_PI) * w /
+                        (sqrt(1 / pow(dx, 2) + 1 / pow(w, 2)) * w);
+            }
+          }
+        } else {
+          if(pos == 1) {
+            // <1/sqrt(2L), g_j> = sqrt(pi/L) * sigma (Gaussian times constant)
+            result = h * sqrt(M_PI / L) * w;
+          } else if(pos % 2 == 0) {
+            // <cos harmonic, g_j>: Fourier transform of Gaussian times exponential damping
+            result = exp(-pow(M_PI * w * (pos / 2), 2) / (2 * pow(L, 2))) * h * sqrt(2 * M_PI / L) * w * cos(M_PI * (x - a) * (pos / 2) / L);
+          } else {
+            result = exp(-pow(M_PI * w * (pos / 2), 2) / (2 * pow(L, 2))) * h * sqrt(2 * M_PI / L) * w * sin(M_PI * (x - a) * (pos / 2) / L);
+          }
+        }
+        M.back().set(sites_new(i) = j, is(i) = pos, result);
+      }
+    }
+  }
+  return make_pair(M, IndexSet(is_new));
+}
+
+// Compute the tensor moment B and environment tensors for the new Gaussian hills.
+//
+// First, L = coeff with each core's basis index replaced by the sample index via M:
+//   L[i] = coeff[i] * M[i], so L has (sample_index, bond_left, bond_right) indices.
+//
+// envi_L[i] = partial contraction of L(1)...L(i) summed over sample indices 1..N,
+//   yielding shape (sample_index_{i+1}, link_i). Used to form the left half of A.
+// envi_R[i] = partial contraction of L(i+2)...L(d) summed over sample indices,
+//   yielding shape (sample_index_{i+1}, link_{i+1}). Used to form the right half of A.
+//
+// B[k] = sum_j envi_L[k-1][j, :] * envi_R[k-1][j, :] * M[k][:, basis_k],
+//   which is the "tensor moment" for core k: the sketch of the Gaussian sum projected
+//   onto the basis functions of dimension k and the sketch subspaces of all other dims.
+tuple<MPS, vector<ITensor>, vector<ITensor>> TTMetaD::formTensorMoment(const vector<ITensor>& M, const MPS& coeff, const IndexSet& is) {
+  int N = dim(is(1));
+  auto links = linkInds(coeff);
+  // L[i] = coeff[i] with basis index contracted against M[i] -> sample index
+  auto L = coeff;
+
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    L.ref(i) *= M[i - 1];
+  }
+
+  // Build left environments: envi_L[i] accumulates the contraction of L(1)..L(i)
+  // over the shared sample index, leaving the next sample index and link free.
+  vector<ITensor> envi_L(this->d_);
+  envi_L[1] = L(1) * delta(is(1), is(2));
+  for(unsigned i = 2; i < this->d_; ++i) {
+    int rankl = dim(links(i - 1));
+    int rankr = dim(links(i));
+    envi_L[i] = ITensor(is(i + 1), links(i));
+    for(int j = 1; j <= N; ++j) {
+      for(int k = 1; k <= rankr; ++k) {
+        ITensor LHS(links(i - 1)), RHS(links(i - 1));
+        for(int ii = 1; ii <= rankl; ++ii) {
+          LHS.set(links(i - 1) = ii, envi_L[i - 1].elt(is(i) = j, links(i - 1) = ii));
+          RHS.set(links(i - 1) = ii, L(i).elt(links(i - 1) = ii, is(i) = j, links(i) = k));
+        }
+        envi_L[i].set(is(i + 1) = j, links(i) = k, elt(LHS * RHS));
+      }
+    }
+  }
+
+  // Build right environments: envi_R[i] accumulates L(i+2)..L(d) right-to-left.
+  vector<ITensor> envi_R(this->d_);
+  envi_R[this->d_ - 2] = L(this->d_) * delta(is(this->d_), is(this->d_ - 1));
+  for(int i = this->d_ - 3; i >= 0; --i) {
+    int rankl = dim(links(i + 1));
+    int rankr = dim(links(i + 2));
+    envi_R[i] = ITensor(is(i + 1), links(i + 1));
+    for(int j = 1; j <= N; ++j) {
+      for(int k = 1; k <= rankl; ++k) {
+        ITensor LHS(links(i + 2)), RHS(links(i + 2));
+        for(int ii = 1; ii <= rankr; ++ii) {
+          LHS.set(links(i + 2) = ii, envi_R[i + 1].elt(is(i + 2) = j, links(i + 2) = ii));
+          RHS.set(links(i + 2) = ii, L(i + 2).elt(links(i + 2) = ii, is(i + 2) = j, links(i + 1) = k));
+        }
+        envi_R[i].set(is(i + 1) = j, links(i + 1) = k, elt(LHS * RHS));
+      }
+    }
+  }
+
+  // Assemble tensor moment B: for each core, combine the left and right environments
+  // element-wise over sample indices, then contract with M to restore basis indices.
+  MPS B(this->d_);
+  B.ref(1) = envi_R[0] * M[0];
+  for(unsigned core_id = 2; core_id < this->d_; ++core_id) {
+    int rankl = dim(links(core_id - 1));
+    int rankr = dim(links(core_id));
+    B.ref(core_id) = ITensor(links(core_id - 1), is(core_id), links(core_id));
+    for(int i = 1; i <= rankl; ++i) {
+      for(int j = 1; j <= rankr; ++j) {
+        for(int k = 1; k <= N; ++k) {
+          double Lelt = envi_L[core_id - 1].elt(is(core_id) = k, links(core_id - 1) = i);
+          double Relt = envi_R[core_id - 1].elt(is(core_id) = k, links(core_id) = j);
+          B.ref(core_id).set(links(core_id - 1) = i, is(core_id) = k, links(core_id) = j, Lelt * Relt);
+        }
+      }
+    }
+    B.ref(core_id) *= M[core_id - 1];
+  }
+  B.ref(this->d_) = envi_L[this->d_ - 1] * M[this->d_ - 1];
+
+  return make_tuple(B, envi_L, envi_R);
+}
+
+// Compute the tensor moment B and environments for the previous TT bias vb_.
+// This is the intrusive variant's contribution from the prior accumulated bias.
+// Analogous to formTensorMoment but uses vb_ in place of M*coeff: the "samples"
+// are the vb_ bond indices, and the environments are formed by contracting coeff
+// with vb_ core by core. The result has the same structure as Bemp from the new hills.
+tuple<MPS, vector<ITensor>, vector<ITensor>> TTMetaD::formTensorMomentVb(const MPS& coeff) {
+  // align vb_ site indices with coeff's site indices for contraction
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    this->vb_.ref(i) *= delta(siteIndex(this->vb_, i), siteIndex(coeff, i));
+  }
+  vector<ITensor> envi_L(this->d_);
+  envi_L[1] = coeff(1) * this->vb_(1);
+  for(unsigned i = 2; i < this->d_; ++i) {
+    envi_L[i] = envi_L[i - 1] * coeff(i) * this->vb_(i);
+  }
+
+  vector<ITensor> envi_R(this->d_);
+  envi_R[this->d_ - 2] = coeff(this->d_) * this->vb_(this->d_);
+  for(int i = this->d_ - 3; i >= 0; --i) {
+    envi_R[i] = envi_R[i + 1] * coeff(i + 2) * this->vb_(i + 2);
+  }
+
+  MPS B(this->d_);
+  B.ref(1) = this->vb_(1) * envi_R[0];
+  for(unsigned core_id = 2; core_id < this->d_; ++core_id) {
+    B.ref(core_id) = envi_L[core_id - 1] * this->vb_(core_id) * envi_R[core_id - 1];
+  }
+  B.ref(this->d_) = envi_L[this->d_ - 1] * this->vb_(this->d_);
+
+  return make_tuple(B, envi_L, envi_R);
+}
+
+}
+}
diff --git a/src/ttsketch/module.type b/src/ttsketch/module.type
new file mode 100644
index 0000000000..de83273033
--- /dev/null
+++ b/src/ttsketch/module.type
@@ -0,0 +1 @@
+default-off
diff --git a/src/ttsketch/options.mk b/src/ttsketch/options.mk
new file mode 100644
index 0000000000..8a72914382
--- /dev/null
+++ b/src/ttsketch/options.mk
@@ -0,0 +1,221 @@
+### User Configurable Options
+
+## To set up, follow the steps [1], [2], [3] below
+
+#########
+## [1]
+##
+## Set which compiler to use by defining CCCOM:
+## GNU GCC compiler
+CCCOM=g++ -m64 -std=c++17 -fconcepts -fPIC
+
+## Clang compiler (good to use on macOS)
+## Note: try this if you have trouble compiling 
+##       with g++ on macOS Mojave 10.14
+#CCCOM=clang++ -std=c++17 -fPIC -Wno-gcc-compat
+
+## GCC On Windows cygwin
+## Extra flags are workaround for limitations
+## of this compiler with Windows binary format
+#CCCOM=g++ -std=c++17 -Wa,-mbig-obj -O2 -fPIC
+
+## Intel C++ Compiler not recommended,
+## As of June 2016 it does not fully support C++17
+
+#########
+
+#########
+## [2]
+## 
+## BLAS/LAPACK Related Options
+## 
+## o The variable PLATFORM can be set to macos,lapack,mkl, or acml.
+##   This tells ITensor which style of BLAS/Lapack library to expect,
+##   and turns various lines of code on or off inside the files
+##   itensor/tensor/lapack_wrap.h and lapack_wrap.cc.
+## 
+## o BLAS_LAPACK_LIBFLAGS specifies blas or lapack related 
+##   flags used during the linking step. For example, 
+##   flags of the type:
+##   -L/path/to/lapack/lib -llapack -lblas
+##   though the names of the static libraries (the files referred
+##   to by the -l flags) can be highly variable - see examples below.
+##
+## o BLAS_LAPACK_INCLUDEFLAGS are blas or lapack related flags 
+##   needed during compilation. It may include flags such as 
+##   -I/path/to/lapack/include
+##   where "include" is a folder containing .h header files.
+##
+
+##
+## Mac OSX system
+##
+
+#PLATFORM=macos
+#BLAS_LAPACK_LIBFLAGS=-framework Accelerate
+
+##
+## Example using a C interface to LAPACK on GNU/LINUX systems
+## (Path to lib/ folder may differ on your system)
+##
+
+#PLATFORM=lapack
+#BLAS_LAPACK_LIBFLAGS=-lpthread -L/usr/lib -lblas -llapack 
+
+##
+## Example using the Intel MKL library
+## (Path to lib/intel64/ and include/ folders may differ on your system)
+##
+
+PLATFORM=mkl
+## Example of using a sequential version of MKL. You may want to link to 
+## the sequential version of MKL if you are using ITensor's native multithreading, 
+## see section [4] for more details.
+BLAS_LAPACK_LIBFLAGS=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core
+## Use the following line if the library is not in the LD_LIBRARY_PATH
+#BLAS_LAPACK_INCLUDEFLAGS=-I/opt/intel/mkl/include
+
+##
+## Example using the OpenBLAS library (http://www.openblas.net/)
+## (Path to lib/ and include/ folders may differ on your system)
+##
+
+#PLATFORM=openblas
+#BLAS_LAPACK_LIBFLAGS=-lpthread -L/usr/local/opt/openblas/lib -lopenblas
+#BLAS_LAPACK_INCLUDEFLAGS=-I/usr/local/opt/openblas/include -fpermissive -DHAVE_LAPACK_CONFIG_H -DLAPACK_COMPLEX_STRUCTURE
+
+##
+## Example using the AMD ACML library
+## (Path to lib/ folder may differ on your system)
+##
+
+#PLATFORM=acml
+#BLAS_LAPACK_LIBFLAGS=-L/opt/acml5.1.0/gfortran64/lib -lacml -lgfortran -lpthread
+
+
+#########
+## [3]
+##
+## If you want ITensor to be compiled with HDF5 support
+## uncomment the line below, and edit it to be the
+## location in which the HDF5 libraries and headers are
+## installed. 
+##
+## To determine the prefix on your system, use the command:
+## h5cc -show
+##
+## For more help, visit: 
+## http://itensor.org/docs.cgi?vers=cppv3&page=install
+##
+HDF5_PREFIX=/software/hdf5-1.12.0-el8-x86_64
+
+
+#########
+## [4]
+##
+## ITensor provides some native OpenMP multithreading.
+## Currently, block sparse tensor contractions support
+## optional multithreading, so any ITensor code using QN
+## conservation may benefit.
+##
+## If you want to enable OpenMP multithreading in ITensor,
+## uncomment the line below. You must have OpenMP installed 
+## on your system.
+##
+## You should set the environment variable OMP_NUM_THREADS
+## before running your code to set the number of threads,
+## for example using the command:
+## export OMP_NUM_THREADS=8
+##
+## We also recommend setting the number of BLAS/LAPACK threads
+## to 1, since BLAS/LAPACK multithreading may compete with ITensor's
+## native multithreading.
+##
+## If you are compiling ITensor with Intel MKL you can set the
+## environment variable:
+##
+## export MKL_NUM_THREADS=1
+##
+## or directly link to MKL's sequential library (see section [2] above).
+##
+## If you are compiling ITensor with OpenBLAS, you can set the
+## environment variable:
+##
+## export OPENBLAS_NUM_THREADS=1
+##
+## For more help, visit: 
+## http://itensor.org/docs.cgi?vers=cppv3&page=install
+##
+ITENSOR_USE_OMP=1
+
+
+#########
+## [5]
+##
+## This step is optional, but if you wish to customize the flags
+## used to compile optimized and debug code, you can do so here.
+
+## Flags to give the compiler for "release mode"
+OPTIMIZATIONS=-O2 -DNDEBUG -Wall -Wno-unknown-pragmas
+
+## Flags to give the compiler for "debug mode"
+DEBUGFLAGS=-DDEBUG -g -Wall -Wno-unknown-pragmas -pedantic
+#
+## Set this to 1 if you want ITensor to also build dynamic libraries
+## These can be faster to link and give smaller binary sizes
+## You may need to set you LD_LIBRARY_PATH to include the ITensor lib/
+## folder in order to link with the dynamic libraries
+ITENSOR_MAKE_DYLIB=0
+
+
+###
+### Other Makefile variables defined for convenience.
+### Not necessary to modify these for most cases.
+###
+
+PREFIX=$(THIS_DIR)
+
+ITENSOR_LIBDIR=$(PREFIX)/lib
+ITENSOR_INCLUDEDIR=$(PREFIX)
+
+ITENSOR_LIBNAMES=itensor
+ITENSOR_LIBFLAGS=$(patsubst %,-l%, $(ITENSOR_LIBNAMES))
+ITENSOR_LIBFLAGS+= $(BLAS_LAPACK_LIBFLAGS)
+ITENSOR_LIBGFLAGS=$(patsubst %,-l%-g, $(ITENSOR_LIBNAMES))
+ITENSOR_LIBGFLAGS+= $(BLAS_LAPACK_LIBFLAGS)
+ITENSOR_LIBS=$(patsubst %,$(ITENSOR_LIBDIR)/lib%.a, $(ITENSOR_LIBNAMES))
+ITENSOR_GLIBS=$(patsubst %,$(ITENSOR_LIBDIR)/lib%-g.a, $(ITENSOR_LIBNAMES))
+
+ITENSOR_INCLUDEFLAGS=-I'$(ITENSOR_INCLUDEDIR)' $(BLAS_LAPACK_INCLUDEFLAGS)
+
+ifdef HDF5_PREFIX
+ITENSOR_USE_HDF5 = 1
+ITENSOR_INCLUDEFLAGS += -I$(HDF5_PREFIX)/include -DITENSOR_USE_HDF5
+ITENSOR_LIBFLAGS += -L$(HDF5_PREFIX)/lib -lhdf5 -lhdf5_hl
+ITENSOR_LIBGFLAGS += -L$(HDF5_PREFIX)/lib -lhdf5 -lhdf5_hl
+endif
+
+ifndef CCCOM
+$(error Makefile variable CCCOM not defined in options.mk; please define it.)
+endif
+
+ifdef ITENSOR_USE_OMP
+ITENSOR_INCLUDEFLAGS += -DITENSOR_USE_OMP -fopenmp
+ITENSOR_LIBFLAGS += -fopenmp
+ITENSOR_LIBGFLAGS += -fopenmp
+endif
+
+CCFLAGS=-I. $(ITENSOR_INCLUDEFLAGS) $(OPTIMIZATIONS) -Wno-unused-variable
+CCGFLAGS=-I. $(ITENSOR_INCLUDEFLAGS) $(DEBUGFLAGS)
+LIBFLAGS=-L'$(ITENSOR_LIBDIR)' $(ITENSOR_LIBFLAGS)
+LIBGFLAGS=-L'$(ITENSOR_LIBDIR)' $(ITENSOR_LIBGFLAGS)
+
+## Determine shared library extension
+UNAME_S := $(shell uname -s)
+ifeq ($(UNAME_S),Darwin)
+  DYLIB_EXT ?= dylib
+  DYLIB_FLAGS ?= -dynamiclib
+else
+  DYLIB_EXT ?= so
+  DYLIB_FLAGS ?= -shared
+endif

From d9cfb65f7a2ede35659aa1f029596f1b1ed7bb9b Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Tue, 31 Mar 2026 22:35:45 -0500
Subject: [PATCH 02/26] replace all division by multiplication and precompute
 some factors

---
 src/ttsketch/BasisFunc.cpp | 77 ++++++++++++++++++++++++--------------
 src/ttsketch/BasisFunc.h   |  3 ++
 2 files changed, 52 insertions(+), 28 deletions(-)

diff --git a/src/ttsketch/BasisFunc.cpp b/src/ttsketch/BasisFunc.cpp
index aeca9b20dc..36daf6ded4 100644
--- a/src/ttsketch/BasisFunc.cpp
+++ b/src/ttsketch/BasisFunc.cpp
@@ -6,10 +6,13 @@ namespace PLMD {
 namespace ttsketch {
 
 BasisFunc::BasisFunc()
-  : dom_(make_pair(0.0, 0.0)), nbasis_(0), L_(0.0), shift_(0.0), w_(0.0), kernel_(false), dx_(0.0) {}
+  : dom_(make_pair(0.0, 0.0)), nbasis_(0), L_(0.0), shift_(0.0), inv_L_(0.0), sqrt_inv_L_(0.0), inv_sqrt_2L_(0.0), w_(0.0), kernel_(false), dx_(0.0) {}
 
 BasisFunc::BasisFunc(pair<double, double> dom, int nbasis, double w, bool kernel, double dx)
-  : dom_(dom), nbasis_(nbasis), L_((dom.second - dom.first) / 2), shift_((dom.second + dom.first) / 2), w_(w), kernel_(kernel), dx_(dx)
+  : dom_(dom), nbasis_(nbasis), L_((dom.second - dom.first) * 0.5), shift_((dom.second + dom.first) * 0.5),
+    inv_L_(2.0 / (dom.second - dom.first)), sqrt_inv_L_(sqrt(2.0 / (dom.second - dom.first))),
+    inv_sqrt_2L_(sqrt(1.0 / (dom.second - dom.first))),
+    w_(w), kernel_(kernel), dx_(dx)
 {
   if(kernel) {
     double spacing = (dom.second - dom.first) / (nbasis - 1);
@@ -32,7 +35,7 @@ BasisFunc::BasisFunc(pair<double, double> dom, int nbasis, double w, bool kernel
     this->gram_(0, 0) = this->dom_.second - this->dom_.first;
     for(int i = 1; i < nbasis; ++i) {
       // G(0,i) = G(i,0) = int phi_{i+1}(x) dx (integral of a periodized Gaussian)
-      this->gram_(i, 0) = this->gram_(0, i) = this->dx_ * sqrt(M_PI / 2) *
+      this->gram_(i, 0) = this->gram_(0, i) = this->dx_ * sqrt(M_PI * 0.5) *
                                 (erf((this->dom_.second -
                                 2 * this->dom_.first + this->centers_[i - 1]) /
                                 (sqrt(2) * this->dx_)) -
@@ -45,7 +48,7 @@ BasisFunc::BasisFunc(pair<double, double> dom, int nbasis, double w, bool kernel
         double result = 0.0;
         for(int k = -1; k <= 1; ++k) {
           for(int l = -1; l <= 1; ++l) {
-            result += this->dx_ / 2 * exp(-pow((this->dom_.first -
+            result += this->dx_ * 0.5 * exp(-pow((this->dom_.first -
                       this->dom_.second) * (k - l) + this->centers_[i - 1] -
                       this->centers_[j - 1], 2) / (4 * pow(this->dx_, 2))) *
                       sqrt(M_PI) * (erf((this->dom_.first * (k + l - 2) -
@@ -65,15 +68,20 @@ BasisFunc::BasisFunc(pair<double, double> dom, int nbasis, double w, bool kernel
 
 // Orthonormal Fourier basis (not convolved):
 //   pos=1: phi_1(x) = 1/sqrt(2L)
-//   pos even: phi_pos(x) = 1/sqrt(L) * cos(pi*(x-shift)*(pos/2)/L)
-//   pos odd:  phi_pos(x) = 1/sqrt(L) * sin(pi*(x-shift)*(pos/2)/L)
+//   pos even: phi_pos(x) = 1/sqrt(L) * cos(pi*(x-shift)*k/L),  k = pos/2
+//   pos odd:  phi_pos(x) = 1/sqrt(L) * sin(pi*(x-shift)*k/L),  k = pos/2
+// k = floor(0.5*pos) intentionally pairs even and odd indices to the same harmonic:
+// phi_2 and phi_3 both use k=1, phi_4 and phi_5 use k=2, etc.
 double BasisFunc::fourier(double x, int pos) const {
   if(pos == 1) {
-    return 1 / sqrt(2 * this->L_);
-  } else if(pos % 2 == 0) {
-    return sqrt(1 / this->L_) * cos(M_PI * (x - this->shift_) * (pos / 2) / this->L_);
+    return this->inv_sqrt_2L_;
+  }
+  double k = floor(0.5 * pos);
+  double phase = M_PI * k * this->inv_L_ * (x - this->shift_);
+  if(pos % 2 == 0) {
+    return this->sqrt_inv_L_ * cos(phase);
   } else {
-    return sqrt(1 / this->L_) * sin(M_PI * (x - this->shift_) * (pos / 2) / this->L_);
+    return this->sqrt_inv_L_ * sin(phase);
   }
 }
 
@@ -96,10 +104,16 @@ double BasisFunc::gaussian(double x, int pos) const {
 double BasisFunc::fourierd(double x, int pos) const {
   if(pos == 1) {
     return 0.0;
-  } else if(pos % 2 == 0) {
-    return -pow(1 / this->L_, 1.5) * M_PI * (pos / 2) * sin(M_PI * (x - this->shift_) * (pos / 2) / this->L_);
+  }
+  // k is the same harmonic index as in fourier()
+  double k = floor(0.5 * pos);
+  // pi*k/L^(3/2) = pi*k * inv_L_ * sqrt_inv_L_, precomputed to avoid pow()
+  double coeff = M_PI * k * this->inv_L_ * this->sqrt_inv_L_;
+  double phase = M_PI * k * this->inv_L_ * (x - this->shift_);
+  if(pos % 2 == 0) {
+    return -coeff * sin(phase);
   } else {
-    return pow(1 / this->L_, 1.5) * M_PI * (pos / 2) * cos(M_PI * (x - this->shift_) * (pos / 2) / this->L_);
+    return  coeff * cos(phase);
   }
 }
 
@@ -145,11 +159,12 @@ double BasisFunc::operator()(double x, int pos, bool conv) const {
   } else {
     if(conv) {
       if(pos == 1) {
-        return 1 / sqrt(2 * this->L_);
+        return this->inv_sqrt_2L_;  // constant, unaffected by convolution
       } else {
         // Fourier convolution theorem: multiplying by exp(-(pi*w*k)^2/(2L^2))
         // is equivalent to convolving phi_pos with a Gaussian of width w_
-        return exp(-pow(M_PI * this->w_ * (pos / 2), 2) / (2 * pow(this->L_, 2))) * fourier(x, pos);
+        double k = floor(0.5 * pos);
+        return exp(-0.5 * pow(M_PI * this->w_ * k * this->inv_L_, 2)) * fourier(x, pos);
       }
     } else {
       return fourier(x, pos);
@@ -182,7 +197,8 @@ double BasisFunc::grad(double x, int pos, bool conv) const {
       if(pos == 1) {
         return 0.0;
       } else {
-        return exp(-pow(M_PI * this->w_ * (pos / 2), 2) / (2 * pow(this->L_, 2))) * fourierd(x, pos);
+        double k = floor(0.5 * pos);
+        return exp(-0.5 * pow(M_PI * this->w_ * k * this->inv_L_, 2)) * fourierd(x, pos);
       }
     } else {
       return fourierd(x, pos);
@@ -197,7 +213,7 @@ double BasisFunc::int0(int pos) const {
     if(pos == 1) {
       return this->dom_.second - this->dom_.first;
     } else {
-      return -this->dx_ * sqrt(M_PI / 2) * (erf((2 * this->dom_.first -
+      return -this->dx_ * sqrt(M_PI * 0.5) * (erf((2 * this->dom_.first -
              this->dom_.second - this->centers_[pos - 2]) /
              (sqrt(2) * this->dx_)) + erf((this->dom_.first -
              2 * this->dom_.second + this->centers_[pos - 2]) /
@@ -207,7 +223,8 @@ double BasisFunc::int0(int pos) const {
     if(pos == 1) {
       return sqrt(2 * this->L_);
     } else if(pos % 2 == 0) {
-      return 2 * sqrt(this->L_) / (M_PI * (pos / 2)) * sin(M_PI * (pos / 2));
+      double k = floor(0.5 * pos);
+      return 2 * sqrt(this->L_) / (M_PI * k) * sin(M_PI * k);
     } else {
       return 0.0;
     }
@@ -219,7 +236,7 @@ double BasisFunc::int0(int pos) const {
 double BasisFunc::int1(int pos) const {
   if(this->kernel_) {
     if(pos == 1) {
-      return (pow(this->dom_.second, 2) - pow(this->dom_.first, 2)) / 2;
+      return (pow(this->dom_.second, 2) - pow(this->dom_.first, 2)) * 0.5;
     } else {
       double a = this->dom_.first;
       double b = this->dom_.second;
@@ -233,15 +250,17 @@ double BasisFunc::int1(int pos) const {
       double term4 = sqrt2pi * (c * erf((-a + c) / (sqrt(2) * this->dx_)) -
                       (a - b + c) * erf((a - 2 * b + c) / (sqrt(2) * this->dx_)) +
                       (a - b) * erf((-b + c) / (sqrt(2) * this->dx_)));
-      return this->dx_ / 2 * (2 * this->dx_ * term1 + term2 + term3 + term4);
+      return this->dx_ * 0.5 * (2 * this->dx_ * term1 + term2 + term3 + term4);
     }
   } else {
     if(pos == 1) {
       return this->shift_ * sqrt(2 * this->L_);
     } else if(pos % 2 == 0) {
-      return 2 * this->shift_ * sqrt(this->L_) / (M_PI * (pos / 2)) * sin(M_PI * (pos / 2));
+      double k = floor(0.5 * pos);
+      return 2 * this->shift_ * sqrt(this->L_) / (M_PI * k) * sin(M_PI * k);
     } else {
-      return 2 * pow(this->L_, 1.5) / pow(M_PI * (pos / 2), 2) * (sin(M_PI * (pos / 2)) - M_PI * (pos / 2) * cos(M_PI * (pos / 2)));
+      double k = floor(0.5 * pos);
+      return 2 * pow(this->L_, 1.5) / pow(M_PI * k, 2) * (sin(M_PI * k) - M_PI * k * cos(M_PI * k));
     }
   }
 }
@@ -281,18 +300,20 @@ double BasisFunc::int2(int pos) const {
           c_sq * erf3 - diff2_sq * erf4 +
           (a - b) * (a - b + 2 * c) * erf5
       );
-      return this->dx_ / 2 * (term1 + term2 + term3);
+      return this->dx_ * 0.5 * (term1 + term2 + term3);
     }
   } else {
     if(pos == 1) {
       return sqrt(2 * this->L_) / 3 * (3 * pow(this->shift_, 2) + pow(this->L_, 2));
     } else if(pos % 2 == 0) {
-      return 2 * sqrt(this->L_) / pow(M_PI * (pos / 2), 3) *
-            (2 * pow(this->L_, 2) * M_PI * (pos / 2) * cos(M_PI * (pos / 2)) +
-            (pow(M_PI * (pos / 2), 2) * (pow(this->shift_, 2) +
-            pow(this->L_, 2)) - 2 * pow(this->L_, 2)) * sin(M_PI * (pos / 2)));
+      double k = floor(0.5 * pos);
+      return 2 * sqrt(this->L_) / pow(M_PI * k, 3) *
+            (2 * pow(this->L_, 2) * M_PI * k * cos(M_PI * k) +
+            (pow(M_PI * k, 2) * (pow(this->shift_, 2) +
+            pow(this->L_, 2)) - 2 * pow(this->L_, 2)) * sin(M_PI * k));
     } else {
-      return 4 * this->shift_ * pow(this->L_, 1.5) / pow(M_PI * (pos / 2), 2) * (sin(M_PI * (pos / 2)) - M_PI * (pos / 2) * cos(M_PI * (pos / 2)));
+      double k = floor(0.5 * pos);
+      return 4 * this->shift_ * pow(this->L_, 1.5) / pow(M_PI * k, 2) * (sin(M_PI * k) - M_PI * k * cos(M_PI * k));
     }
   }
 }
diff --git a/src/ttsketch/BasisFunc.h b/src/ttsketch/BasisFunc.h
index 68559a87d2..5231a1bec1 100644
--- a/src/ttsketch/BasisFunc.h
+++ b/src/ttsketch/BasisFunc.h
@@ -30,6 +30,9 @@ class BasisFunc {
   int nbasis_;                    // number of basis functions n (including constant phi_1)
   double L_;                      // domain half-length: L = (b-a)/2
   double shift_;                  // domain midpoint: shift = (a+b)/2
+  double inv_L_;                  // 1/L, precomputed to avoid repeated division in Fourier evaluations
+  double sqrt_inv_L_;             // 1/sqrt(L), precomputed to avoid repeated pow() in Fourier evaluations
+  double inv_sqrt_2L_;            // 1/sqrt(2L), the value of phi_1(x) for the Fourier basis
   double w_;                      // smoothing width for conv mode (Fourier: sigma in x-units; kernel: Gaussian hill width)
   bool kernel_;                   // true = kernel basis, false = Fourier basis
   double dx_;                     // kernel basis function width (std dev of each periodized Gaussian)

From 3476490b322e8eb4d7f754b1b6c46a1d247c4f41 Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Wed, 1 Apr 2026 08:25:25 -0500
Subject: [PATCH 03/26] no namespace std

---
 src/ttsketch/BasisFunc.cpp |   8 +-
 src/ttsketch/TTHelper.cpp  |  41 +++++-----
 src/ttsketch/TTMetaD.cpp   | 159 ++++++++++++++++++-------------------
 3 files changed, 102 insertions(+), 106 deletions(-)

diff --git a/src/ttsketch/BasisFunc.cpp b/src/ttsketch/BasisFunc.cpp
index 36daf6ded4..39c837fe1c 100644
--- a/src/ttsketch/BasisFunc.cpp
+++ b/src/ttsketch/BasisFunc.cpp
@@ -1,14 +1,12 @@
 #include "BasisFunc.h"
 
-using namespace std;
-
 namespace PLMD {
 namespace ttsketch {
 
 BasisFunc::BasisFunc()
-  : dom_(make_pair(0.0, 0.0)), nbasis_(0), L_(0.0), shift_(0.0), inv_L_(0.0), sqrt_inv_L_(0.0), inv_sqrt_2L_(0.0), w_(0.0), kernel_(false), dx_(0.0) {}
+  : dom_(std::make_pair(0.0, 0.0)), nbasis_(0), L_(0.0), shift_(0.0), inv_L_(0.0), sqrt_inv_L_(0.0), inv_sqrt_2L_(0.0), w_(0.0), kernel_(false), dx_(0.0) {}
 
-BasisFunc::BasisFunc(pair<double, double> dom, int nbasis, double w, bool kernel, double dx)
+BasisFunc::BasisFunc(std::pair<double, double> dom, int nbasis, double w, bool kernel, double dx)
   : dom_(dom), nbasis_(nbasis), L_((dom.second - dom.first) * 0.5), shift_((dom.second + dom.first) * 0.5),
     inv_L_(2.0 / (dom.second - dom.first)), sqrt_inv_L_(sqrt(2.0 / (dom.second - dom.first))),
     inv_sqrt_2L_(sqrt(1.0 / (dom.second - dom.first))),
@@ -21,7 +19,7 @@ BasisFunc::BasisFunc(pair<double, double> dom, int nbasis, double w, bool kernel
       this->dx_ = spacing;
     }
     // nbasis-1 kernel centers, uniformly spaced starting at dom.first
-    this->centers_ = vector<double>(nbasis - 1);
+    this->centers_ = std::vector<double>(nbasis - 1);
     for(int i = 0; i < nbasis - 1; ++i) {
       this->centers_[i] = dom.first + i * spacing;
     }
diff --git a/src/ttsketch/TTHelper.cpp b/src/ttsketch/TTHelper.cpp
index 8894207c3a..62210e3794 100644
--- a/src/ttsketch/TTHelper.cpp
+++ b/src/ttsketch/TTHelper.cpp
@@ -1,41 +1,40 @@
 #include "TTHelper.h"
 
-using namespace std;
 using namespace itensor;
 
 namespace PLMD {
 namespace ttsketch {
 
-void ttWrite(const string& filename, const MPS& tt, unsigned count) {
+void ttWrite(const std::string& filename, const MPS& tt, unsigned count) {
   // count starts at 2 on the first sketch call; open for writing on first call, append thereafter
   auto f = count == 2 ? h5_open(filename, 'w') : h5_open(filename, 'a');
-  h5_write(f, "tt_" + to_string(count - 1), tt);
+  h5_write(f, "tt_" + std::to_string(count - 1), tt);
 }
 
-MPS ttRead(const string& filename, unsigned count) {
+MPS ttRead(const std::string& filename, unsigned count) {
   auto f = h5_open(filename, 'r');
-  auto tt = h5_read<MPS>(f, "tt_" + to_string(count - 1));
+  auto tt = h5_read<MPS>(f, "tt_" + std::to_string(count - 1));
   return tt;
 }
 
-void ttSumWrite(const string& filename, const MPS& tt, unsigned count) {
+void ttSumWrite(const std::string& filename, const MPS& tt, unsigned count) {
   auto f = h5_open(filename, 'a');
-  h5_write(f, "vb_" + to_string(count - 1), tt);
+  h5_write(f, "vb_" + std::to_string(count - 1), tt);
 }
 
-MPS ttSumRead(const string& filename, unsigned count) {
+MPS ttSumRead(const std::string& filename, unsigned count) {
   auto f = h5_open(filename, 'r');
-  auto tt = h5_read<MPS>(f, "vb_" + to_string(count - 1));
+  auto tt = h5_read<MPS>(f, "vb_" + std::to_string(count - 1));
   return tt;
 }
 
 // Evaluate TT by contracting each core with its 1D basis vector phi(x_i):
 //   result = sum_{i_1,...,i_d} G_1[i_1] * ... * G_d[i_d] * phi_1(x_1,i_1) * ... * phi_d(x_d,i_d)
 // The contraction is performed left-to-right so intermediate results stay rank-1 scalars.
-double ttEval(const MPS& tt, const vector<BasisFunc>& basis, const vector<double>& elements, bool conv) {
+double ttEval(const MPS& tt, const std::vector<BasisFunc>& basis, const std::vector<double>& elements, bool conv) {
   int d = length(tt);
   auto s = siteInds(tt);
-  vector<ITensor> basis_evals(d);
+  std::vector<ITensor> basis_evals(d);
   for(int i = 1; i <= d; ++i) {
     basis_evals[i - 1] = ITensor(s(i));
     for(int j = 1; j <= dim(s(i)); ++j) {
@@ -52,11 +51,11 @@ double ttEval(const MPS& tt, const vector<BasisFunc>& basis, const vector<double
 // Gradient of ttEval w.r.t. elements. For each dimension k, one TT contraction is performed
 // with the k-th basis vector replaced by its derivative d phi_k/dx_k (chain rule).
 // All basis evaluations are precomputed to avoid redundant work across the d contractions.
-vector<double> ttGrad(const MPS& tt, const vector<BasisFunc>& basis, const vector<double>& elements, bool conv) {
+std::vector<double> ttGrad(const MPS& tt, const std::vector<BasisFunc>& basis, const std::vector<double>& elements, bool conv) {
   int d = length(tt);
   auto s = siteInds(tt);
-  vector<double> grad(d, 0.0);
-  vector<ITensor> basis_evals(d), basisd_evals(d);
+  std::vector<double> grad(d, 0.0);
+  std::vector<ITensor> basis_evals(d), basisd_evals(d);
   for(int i = 1; i <= d; ++i) {
     basis_evals[i - 1] = basisd_evals[i - 1] = ITensor(s(i));
     for(int j = 1; j <= dim(s(i)); ++j) {
@@ -78,11 +77,11 @@ vector<double> ttGrad(const MPS& tt, const vector<BasisFunc>& basis, const vecto
 // Compute covariance matrix, marginal means, and partition function of the TT distribution.
 // Precomputes ITensors for the three moment integrals int0/int1/int2 per dimension,
 // then evaluates expectations as TT contractions with these integral vectors.
-tuple<Matrix<double>, vector<double>, double> covMat(const MPS& tt, const vector<BasisFunc>& basis) {
+std::tuple<Matrix<double>, std::vector<double>, double> covMat(const MPS& tt, const std::vector<BasisFunc>& basis) {
   int d = length(tt);
   auto s = siteInds(tt);
   // integral vectors: int0[i][j] = int phi_j(x_i) dx_i, etc.
-  vector<ITensor> basis_int0(d), basis_int1(d), basis_int2(d);
+  std::vector<ITensor> basis_int0(d), basis_int1(d), basis_int2(d);
   for(int i = 1; i <= d; ++i) {
     basis_int0[i - 1] = basis_int1[i - 1] = basis_int2[i - 1] = ITensor(s(i));
     for(int j = 1; j <= dim(s(i)); ++j) {
@@ -99,8 +98,8 @@ tuple<Matrix<double>, vector<double>, double> covMat(const MPS& tt, const vector
   auto rho = tt;
   rho /= elt(Z);
   // ei[k]  = E[x_k],  eii[k] = E[x_k^2],  eij[k][l] = E[x_k * x_l] for k < l
-  vector<double> ei(d), eii(d);
-  vector<vector<double>> eij(d, vector<double>(d));
+  std::vector<double> ei(d), eii(d);
+  std::vector<std::vector<double>> eij(d, std::vector<double>(d));
   for(int k = 1; k <= d; ++k) {
     // replace dimension k's integral vector with int1 (resp. int2) to get E[x_k] (resp. E[x_k^2])
     auto eival = rho(1) * (k == 1 ? basis_int1[0] : basis_int0[0]);
@@ -127,17 +126,17 @@ tuple<Matrix<double>, vector<double>, double> covMat(const MPS& tt, const vector
       sigma(k - 1, l - 1) = sigma(l - 1, k - 1) = k == l ? eii[k - 1] - pow(ei[k - 1], 2) : eij[k - 1][l - 1] - ei[k - 1] * ei[l - 1];
     }
   }
-  return make_tuple(sigma, ei, elt(Z));
+  return std::make_tuple(sigma, ei, elt(Z));
 }
 
 // Compute the 2D marginal density of the normalized TT distribution for dimensions
 // pos1 and pos2 on a (bins x bins) grid. All other dimensions are integrated out
 // analytically using int0, which collapses those TT cores to scalar factors.
-void marginal2d(const MPS& tt, const vector<BasisFunc>& basis, int pos1, int pos2, vector<vector<double>>& grid, bool conv) {
+void marginal2d(const MPS& tt, const std::vector<BasisFunc>& basis, int pos1, int pos2, std::vector<std::vector<double>>& grid, bool conv) {
   int bins = grid.size();
   int d = length(tt);
   auto s = siteInds(tt);
-  vector<ITensor> basis_int0(d);
+  std::vector<ITensor> basis_int0(d);
   for(int i = 1; i <= d; ++i) {
     basis_int0[i - 1] = ITensor(s(i));
     for(int j = 1; j <= dim(s(i)); ++j) {
diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index b9babd2f6f..d5def036aa 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -18,7 +18,6 @@
 #include "tools/File.h"
 #include "tools/OpenMP.h"
 
-using namespace std;
 using namespace itensor;
 using namespace PLMD::bias;
 
@@ -31,10 +30,10 @@ class TTMetaD : public Bias {
   struct Gaussian {
     bool multivariate;
     double height;
-    vector<double> center;
-    vector<double> sigma;
-    vector<double> invsigma;
-    Gaussian(const double h, const vector<double>& c, const vector<double>& s)
+    std::vector<double> center;
+    std::vector<double> sigma;
+    std::vector<double> invsigma;
+    Gaussian(const double h, const std::vector<double>& c, const std::vector<double>& s)
       : height(h), center(c), sigma(s), invsigma(s) {
       for(unsigned i = 0; i < invsigma.size(); ++i) {
         if(abs(invsigma[i]) > 1.e-20) {
@@ -50,14 +49,14 @@ class TTMetaD : public Bias {
   int stride_;
   bool welltemp_;
   double biasf_;
-  string fmt_;
+  std::string fmt_;
   bool isFirstStep_;
   double height0_;
-  vector<double> sigma0_;
-  vector<Gaussian> hills_;   // Gaussians deposited since the last TT update
+  std::vector<double> sigma0_;
+  std::vector<Gaussian> hills_;   // Gaussians deposited since the last TT update
   OFile hillsOfile_;
-  string mw_dir_;
-  string hillsfname_;
+  std::string mw_dir_;
+  std::string hillsfname_;
   bool walkers_mpi_;
   int mpi_size_;
   int mpi_rank_;
@@ -68,7 +67,7 @@ class TTMetaD : public Bias {
   double sketch_cutoff_;     // relative SVD truncation threshold (used when sketch_r_=0)
   int sketch_stride_;        // steps between TT bias updates (tau in the paper)
   double sketch_alpha_;      // weight for non-constant basis functions in the sketch coefficient TT
-  vector<BasisFunc> sketch_basis_;  // one BasisFunc per CV dimension
+  std::vector<BasisFunc> sketch_basis_;  // one BasisFunc per CV dimension
   unsigned sketch_count_;    // number of TT updates performed so far (starts at 1, increments after each update)
   MPS vb_;                   // current TT approximation of the accumulated bias (V_bias^TT)
   double sketch_until_;      // simulation time after which the bias is frozen (no further updates)
@@ -78,21 +77,21 @@ class TTMetaD : public Bias {
                              // instead of re-projecting the previous vb_ through the Gram matrix
   bool deterministic_;       // if true, use a fixed RNG seed (42) in createTTCoeff() for reproducibility
   MPS B_prev_;               // accumulated tensor moment from all previous sketches (nonintrusive only)
-  vector<ITensor> A_prev_;   // accumulated cross-gram matrices per bond (nonintrusive only)
+  std::vector<ITensor> A_prev_;   // accumulated cross-gram matrices per bond (nonintrusive only)
 
   void readGaussians(IFile *ifile);
   void writeGaussian(const Gaussian& hill, OFile& file);
-  double getHeight(const vector<double>& cv);
-  double getBias(const vector<double>& cv);
-  double getBiasAndDerivatives(const vector<double>& cv, vector<double>& der);
-  double evaluateGaussian(const vector<double>& cv, const Gaussian& hill);
-  double evaluateGaussianAndDerivatives(const vector<double>& cv, const Gaussian& hill, vector<double>& der, vector<double>& dp);
-  bool scanOneHill(IFile* ifile, vector<Value>& tmpvalues, vector<double>& center, vector<double>& sigma, double& height);
+  double getHeight(const std::vector<double>& cv);
+  double getBias(const std::vector<double>& cv);
+  double getBiasAndDerivatives(const std::vector<double>& cv, std::vector<double>& der);
+  double evaluateGaussian(const std::vector<double>& cv, const Gaussian& hill);
+  double evaluateGaussianAndDerivatives(const std::vector<double>& cv, const Gaussian& hill, std::vector<double>& der, std::vector<double>& dp);
+  bool scanOneHill(IFile* ifile, std::vector<Value>& tmpvalues, std::vector<double>& center, std::vector<double>& sigma, double& height);
   void paraSketch();
   MPS createTTCoeff() const;
-  pair<vector<ITensor>, IndexSet> intBasisSample(const IndexSet& is) const;
-  tuple<MPS, vector<ITensor>, vector<ITensor>> formTensorMoment(const vector<ITensor>& M, const MPS& coeff, const IndexSet& is);
-  tuple<MPS, vector<ITensor>, vector<ITensor>> formTensorMomentVb(const MPS& coeff);
+  std::pair<std::vector<ITensor>, IndexSet> intBasisSample(const IndexSet& is) const;
+  std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> formTensorMoment(const std::vector<ITensor>& M, const MPS& coeff, const IndexSet& is);
+  std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> formTensorMomentVb(const MPS& coeff);
 
 public:
   explicit TTMetaD(const ActionOptions&);
@@ -139,7 +138,7 @@ TTMetaD::TTMetaD(const ActionOptions& ao):
   welltemp_(false),
   biasf_(-1.0),
   isFirstStep_(true),
-  height0_(numeric_limits<double>::max()),
+  height0_(std::numeric_limits<double>::max()),
   mw_dir_(""),
   walkers_mpi_(false),
   mpi_size_(0),
@@ -147,7 +146,7 @@ TTMetaD::TTMetaD(const ActionOptions& ao):
   sketch_r_(0),
   sketch_cutoff_(0.0),
   sketch_count_(1),
-  sketch_until_(numeric_limits<double>::max()),
+  sketch_until_(std::numeric_limits<double>::max()),
   frozen_(false),
   sketch_conv_(false),
   nonintrusive_(false),
@@ -186,7 +185,7 @@ TTMetaD::TTMetaD(const ActionOptions& ao):
   parseFlag("WALKERS_MPI", this->walkers_mpi_);
   parse("WALKERS_DIR", this->mw_dir_);
   if(this->walkers_mpi_ && this->mw_dir_== "") {
-    const string ret = filesystem::current_path();
+    const std::string ret = std::filesystem::current_path();
     this->mw_dir_ = ret + "/";
     multi_sim_comm.Bcast(this->mw_dir_, 0);
   }
@@ -207,12 +206,12 @@ TTMetaD::TTMetaD(const ActionOptions& ao):
   if(this->sketch_stride_ <= 0) {
     error("SKETCH_PACE must be positive");
   }
-  vector<double> interval_min;
+  std::vector<double> interval_min;
   parseVector("INTERVAL_MIN", interval_min);
   if(interval_min.size() != this->d_) {
     error("Number of arguments does not match number of INTERVAL_MIN parameters");
   }
-  vector<double> interval_max;
+  std::vector<double> interval_max;
   parseVector("INTERVAL_MAX", interval_max);
   if(interval_max.size() != this->d_) {
     error("Number of arguments does not match number of INTERVAL_MAX parameters");
@@ -229,7 +228,7 @@ TTMetaD::TTMetaD(const ActionOptions& ao):
   if(this->sketch_alpha_ <= 0.0 || this->sketch_alpha_ > 1.0) {
     error("SKETCH_ALPHA must be positive and no greater than 1");
   }
-  vector<double> w;
+  std::vector<double> w;
   parseVector("SKETCH_WIDTH", w);
   if(w.size() == 0) {
     w.assign(this->d_, 0.0);
@@ -242,7 +241,7 @@ TTMetaD::TTMetaD(const ActionOptions& ao):
       this->sketch_conv_ = true;
     }
   }
-  vector<double> dx;
+  std::vector<double> dx;
   parseVector("KERNEL_DX", dx);
   if(dx.size() == 0) {
     dx.resize(this->d_, 0.0);
@@ -260,7 +259,7 @@ TTMetaD::TTMetaD(const ActionOptions& ao):
     if(interval_max[i] <= interval_min[i]) {
       error("INTERVAL_MAX parameters need to be greater than respective INTERVAL_MIN parameters");
     }
-    this->sketch_basis_.push_back(BasisFunc(make_pair(interval_min[i], interval_max[i]), nbasis, w[i], kernel, dx[i]));
+    this->sketch_basis_.push_back(BasisFunc(std::make_pair(interval_min[i], interval_max[i]), nbasis, w[i], kernel, dx[i]));
   }
   if(kernel && this->sketch_conv_) {
     error("kernel smoothing incompatible with kernel basis");
@@ -276,7 +275,7 @@ TTMetaD::TTMetaD(const ActionOptions& ao):
   parseFlag("DETERMINISTIC", this->deterministic_);
 
   if(getRestart()) {
-    string ttfilename = "ttsketch.h5";
+    std::string ttfilename = "ttsketch.h5";
     if(this->walkers_mpi_) {
       ttfilename = "../" + ttfilename;
     }
@@ -319,12 +318,12 @@ TTMetaD::TTMetaD(const ActionOptions& ao):
 }
 
 void TTMetaD::readGaussians(IFile *ifile) {
-  vector<double> center(this->d_);
-  vector<double> sigma(this->d_);
+  std::vector<double> center(this->d_);
+  std::vector<double> sigma(this->d_);
   double height;
   int nhills = 0;
 
-  vector<Value> tmpvalues;
+  std::vector<Value> tmpvalues;
   for(unsigned j = 0; j < this->d_; ++j) {
     tmpvalues.push_back(Value(this, getPntrToArgument(j)->getName(), false));
   }
@@ -340,7 +339,7 @@ void TTMetaD::readGaussians(IFile *ifile) {
   log << "  " << nhills << " hills retrieved\n";
 }
 
-bool TTMetaD::scanOneHill(IFile* ifile, vector<Value>& tmpvalues, vector<double>& center, vector<double>& sigma, double& height) {
+bool TTMetaD::scanOneHill(IFile* ifile, std::vector<Value>& tmpvalues, std::vector<double>& center, std::vector<double>& sigma, double& height) {
   double dummy;
   if(ifile->scanField("time", dummy)) {
     unsigned ncv = tmpvalues.size();
@@ -349,9 +348,9 @@ bool TTMetaD::scanOneHill(IFile* ifile, vector<Value>& tmpvalues, vector<double>
       if(tmpvalues[i].isPeriodic() && !getPntrToArgument(i)->isPeriodic()) {
         error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
       } else if(tmpvalues[i].isPeriodic()) {
-        string imin, imax;
+        std::string imin, imax;
         tmpvalues[i].getDomain(imin, imax);
-        string rmin, rmax;
+        std::string rmin, rmax;
         getPntrToArgument(i)->getDomain(rmin, rmax);
         if(imin != rmin || imax != rmax) {
           error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
@@ -390,12 +389,12 @@ void TTMetaD::writeGaussian(const Gaussian& hill, OFile&file) {
 
 // Called every MD step: evaluate total bias (TT + remaining Gaussians) and set forces.
 void TTMetaD::calculate() {
-  vector<double> cv(this->d_);
+  std::vector<double> cv(this->d_);
   for(unsigned i = 0; i < this->d_; ++i) {
     cv[i] = getArgument(i);
   }
 
-  vector<double> der(this->d_, 0.0);
+  std::vector<double> der(this->d_, 0.0);
 
   double ene = getBiasAndDerivatives(cv, der);
   setBias(ene);
@@ -439,7 +438,7 @@ void TTMetaD::update() {
 
       log << "\nEmpirical means:\n";
       Matrix<double> sigmahat(this->d_, this->d_);
-      vector<double> muhat(this->d_, 0.0);
+      std::vector<double> muhat(this->d_, 0.0);
       for(unsigned k = 0; k < this->d_; ++k) {
         for(unsigned j = 0; j < N; ++j) {
           muhat[k] += this->hills_[j].center[k] / N;
@@ -459,8 +458,8 @@ void TTMetaD::update() {
       matrixOut(log, sigmahat);
 
       // record current bias at hill centers before update, for computing relative error after
-      vector<double> A0(N);
-      vector<vector<double>> x(N);
+      std::vector<double> A0(N);
+      std::vector<std::vector<double>> x(N);
       for(unsigned i = 0; i < N; ++i) {
         x[i] = this->hills_[i].center;
         A0[i] = getBias(x[i]);
@@ -474,15 +473,15 @@ void TTMetaD::update() {
       this->hills_.clear();
 
       // compute relative L2 approximation error at hill centers: ||V_new - V_old|| / ||V_old||
-      vector<double> diff(N);
+      std::vector<double> diff(N);
       for(unsigned i = 0; i < N; ++i) {
         diff[i] = getBias(x[i]);
       }
-      transform(diff.begin(), diff.end(), A0.begin(), diff.begin(), minus<double>());
+      std::transform(diff.begin(), diff.end(), A0.begin(), diff.begin(), std::minus<double>());
       log << "Relative l2 error = " << sqrt(norm(diff) / norm(A0)) << "\n\n";
       log.flush();
 
-      string ttfilename = "ttsketch.h5";
+      std::string ttfilename = "ttsketch.h5";
       if(this->walkers_mpi_) {
         ttfilename = "../" + ttfilename;
       }
@@ -519,7 +518,7 @@ void TTMetaD::update() {
     this->isFirstStep_ = false;
   }
 
-  vector<double> cv(this->d_);
+  std::vector<double> cv(this->d_);
   for(unsigned i = 0; i < this->d_; ++i) {
     cv[i] = getArgument(i);
   }
@@ -528,16 +527,16 @@ void TTMetaD::update() {
     double height = getHeight(cv);
 
     if(this->walkers_mpi_) {
-      vector<double> all_cv(this->mpi_size_ * this->d_, 0.0);
-      vector<double> all_sigma(this->mpi_size_ * this->sigma0_.size(), 0.0);
-      vector<double> all_height(this->mpi_size_, 0.0);
+      std::vector<double> all_cv(this->mpi_size_ * this->d_, 0.0);
+      std::vector<double> all_sigma(this->mpi_size_ * this->sigma0_.size(), 0.0);
+      std::vector<double> all_height(this->mpi_size_, 0.0);
       multi_sim_comm.Allgather(cv, all_cv);
       multi_sim_comm.Allgather(this->sigma0_, all_sigma);
       multi_sim_comm.Allgather(height * (this->biasf_ > 1.0 ? this->biasf_ / (this->biasf_ - 1.0) : 1.0), all_height);
 
       for(int i = 0; i < this->mpi_size_; i++) {
-        vector<double> cv_now(this->d_);
-        vector<double> sigma_now(this->sigma0_.size());
+        std::vector<double> cv_now(this->d_);
+        std::vector<double> sigma_now(this->sigma0_.size());
         for(unsigned j = 0; j < this->d_; j++) {
           cv_now[j] = all_cv[i * this->d_ + j];
         }
@@ -564,7 +563,7 @@ void TTMetaD::update() {
   }
 }
 
-double TTMetaD::getHeight(const vector<double>& cv) {
+double TTMetaD::getHeight(const std::vector<double>& cv) {
   double height = this->height0_;
   if(this->welltemp_) {
     double vbias = getBias(cv);
@@ -578,7 +577,7 @@ double TTMetaD::getHeight(const vector<double>& cv) {
 }
 
 // Total bias = TT approximation of accumulated hills + sum of recently deposited Gaussians.
-double TTMetaD::getBias(const vector<double>& cv) {
+double TTMetaD::getBias(const std::vector<double>& cv) {
   double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
   unsigned nt = OpenMP::getNumThreads();
   #pragma omp parallel num_threads(nt)
@@ -591,22 +590,22 @@ double TTMetaD::getBias(const vector<double>& cv) {
   return bias;
 }
 
-double TTMetaD::getBiasAndDerivatives(const vector<double>& cv, vector<double>& der) {
+double TTMetaD::getBiasAndDerivatives(const std::vector<double>& cv, std::vector<double>& der) {
   double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
   if(length(this->vb_) != 0) {
     der = ttGrad(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
   }
   unsigned nt = OpenMP::getNumThreads();
   if(this->hills_.size() < 2 * nt || nt == 1) {
-    vector<double> dp(this->d_);
+    std::vector<double> dp(this->d_);
     for(unsigned i = 0; i < this->hills_.size(); ++i) {
       bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], der, dp);
     }
   } else {
     #pragma omp parallel num_threads(nt)
     {
-      vector<double> omp_deriv(this->d_, 0.0);
-      vector<double> dp(this->d_);
+      std::vector<double> omp_deriv(this->d_, 0.0);
+      std::vector<double> dp(this->d_);
       #pragma omp for reduction(+:bias) nowait
       for(unsigned i = 0; i < this->hills_.size(); ++i) {
         bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], omp_deriv, dp);
@@ -620,7 +619,7 @@ double TTMetaD::getBiasAndDerivatives(const vector<double>& cv, vector<double>&
   return bias;
 }
 
-double TTMetaD::evaluateGaussian(const vector<double>& cv, const Gaussian& hill) {
+double TTMetaD::evaluateGaussian(const std::vector<double>& cv, const Gaussian& hill) {
   double dp2 = 0.0;
   for(unsigned i = 0; i < this->d_; i++) {
     double dp = difference(i, hill.center[i], cv[i]) * hill.invsigma[i];
@@ -636,7 +635,7 @@ double TTMetaD::evaluateGaussian(const vector<double>& cv, const Gaussian& hill)
   return bias;
 }
 
-double TTMetaD::evaluateGaussianAndDerivatives(const vector<double>& cv, const Gaussian& hill, vector<double>& der, vector<double>& dp) {
+double TTMetaD::evaluateGaussianAndDerivatives(const std::vector<double>& cv, const Gaussian& hill, std::vector<double>& der, std::vector<double>& dp) {
   double dp2 = 0.0;
   double bias = 0.0;
   for(unsigned i = 0; i < this->d_; i++) {
@@ -676,8 +675,8 @@ void TTMetaD::paraSketch() {
 
   auto [Bemp, envi_L, envi_R] = formTensorMoment(M, coeff, is);
   MPS Bemp_Vb;
-  vector<ITensor> envi_L_Vb;
-  vector<ITensor> envi_R_Vb;
+  std::vector<ITensor> envi_L_Vb;
+  std::vector<ITensor> envi_R_Vb;
   if(this->sketch_count_ != 1) {
     if(this->nonintrusive_) {
       // Non-intrusive variant: accumulate tensor moments directly across sketches.
@@ -730,9 +729,9 @@ void TTMetaD::paraSketch() {
         }
       }
       auto vbresult = formTensorMomentVb(coeff);
-      Bemp_Vb = get<0>(vbresult);
-      envi_L_Vb = get<1>(vbresult);
-      envi_R_Vb = get<2>(vbresult);
+      Bemp_Vb = std::get<0>(vbresult);
+      envi_L_Vb = std::get<1>(vbresult);
+      envi_R_Vb = std::get<2>(vbresult);
       for(unsigned i = 1; i <= this->d_; ++i) {
         Bemp.ref(i) += Bemp_Vb(i);
       }
@@ -748,8 +747,8 @@ void TTMetaD::paraSketch() {
   // where L = envi_L[k] and R = envi_R[k-2] are the left/right environment projections
   // of coeff onto the sample indices. SVD A to find the rank-trimmed subspace V.
   auto links = linkInds(coeff);
-  vector<ITensor> U(this->d_), S(this->d_), V(this->d_);
-  vector<Index> links_trimmed;
+  std::vector<ITensor> U(this->d_), S(this->d_), V(this->d_);
+  std::vector<Index> links_trimmed;
   for(unsigned core_id = 2; core_id <= this->d_; ++core_id) {
     int rank = dim(links(core_id - 1));
     Matrix<double> LMat(N, rank), RMat(N, rank);
@@ -873,8 +872,8 @@ void TTMetaD::paraSketch() {
 // full weight while all other harmonics are scaled down by sketch_alpha_. This
 // ensures the sketch captures the large constant component of the bias accurately.
 MPS TTMetaD::createTTCoeff() const {
-  default_random_engine generator(this->deterministic_ ? 42u : static_cast<unsigned int>(time(nullptr)));
-  normal_distribution<double> distribution(0.0, 1.0);
+  std::default_random_engine generator(this->deterministic_ ? 42u : static_cast<unsigned int>(time(nullptr)));
+  std::normal_distribution<double> distribution(0.0, 1.0);
   int n = this->sketch_basis_[0].nbasis();
   auto sites = SiteSet(this->d_, n);
   auto coeff = MPS(sites, this->sketch_rc_);
@@ -900,7 +899,7 @@ MPS TTMetaD::createTTCoeff() const {
   for(unsigned i = 1; i <= this->d_; ++i) {
     auto s = sites(i);
     auto sp = prime(s);
-    vector<double> Avec(n, this->sketch_alpha_);
+    std::vector<double> Avec(n, this->sketch_alpha_);
     Avec[0] = 1.0;
     auto A = diagITensor(Avec, s, sp);
     coeff.ref(i) *= A;
@@ -918,12 +917,12 @@ MPS TTMetaD::createTTCoeff() const {
 //   - Fourier basis: Fourier transform of the 1D Gaussian (exponential decay * harmonic)
 // Returns M as a vector of ITensors (one per dimension) and a new IndexSet is_new
 // where is_new(i) indexes the N sample points for dimension i.
-pair<vector<ITensor>, IndexSet> TTMetaD::intBasisSample(const IndexSet& is) const {
+std::pair<std::vector<ITensor>, IndexSet> TTMetaD::intBasisSample(const IndexSet& is) const {
   unsigned N = this->hills_.size();
   int nb = this->sketch_basis_[0].nbasis();
   auto sites_new = SiteSet(this->d_, N);
-  vector<ITensor> M;
-  vector<Index> is_new;
+  std::vector<ITensor> M;
+  std::vector<Index> is_new;
   for(unsigned i = 1; i <= this->d_; ++i) {
     double L = (this->sketch_basis_[i - 1].dom().second - this->sketch_basis_[i - 1].dom().first) / 2;
     double a = (this->sketch_basis_[i - 1].dom().second + this->sketch_basis_[i - 1].dom().first) / 2;
@@ -963,7 +962,7 @@ pair<vector<ITensor>, IndexSet> TTMetaD::intBasisSample(const IndexSet& is) cons
       }
     }
   }
-  return make_pair(M, IndexSet(is_new));
+  return std::make_pair(M, IndexSet(is_new));
 }
 
 // Compute the tensor moment B and environment tensors for the new Gaussian hills.
@@ -979,7 +978,7 @@ pair<vector<ITensor>, IndexSet> TTMetaD::intBasisSample(const IndexSet& is) cons
 // B[k] = sum_j envi_L[k-1][j, :] * envi_R[k-1][j, :] * M[k][:, basis_k],
 //   which is the "tensor moment" for core k: the sketch of the Gaussian sum projected
 //   onto the basis functions of dimension k and the sketch subspaces of all other dims.
-tuple<MPS, vector<ITensor>, vector<ITensor>> TTMetaD::formTensorMoment(const vector<ITensor>& M, const MPS& coeff, const IndexSet& is) {
+std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorMoment(const std::vector<ITensor>& M, const MPS& coeff, const IndexSet& is) {
   int N = dim(is(1));
   auto links = linkInds(coeff);
   // L[i] = coeff[i] with basis index contracted against M[i] -> sample index
@@ -991,7 +990,7 @@ tuple<MPS, vector<ITensor>, vector<ITensor>> TTMetaD::formTensorMoment(const vec
 
   // Build left environments: envi_L[i] accumulates the contraction of L(1)..L(i)
   // over the shared sample index, leaving the next sample index and link free.
-  vector<ITensor> envi_L(this->d_);
+  std::vector<ITensor> envi_L(this->d_);
   envi_L[1] = L(1) * delta(is(1), is(2));
   for(unsigned i = 2; i < this->d_; ++i) {
     int rankl = dim(links(i - 1));
@@ -1010,7 +1009,7 @@ tuple<MPS, vector<ITensor>, vector<ITensor>> TTMetaD::formTensorMoment(const vec
   }
 
   // Build right environments: envi_R[i] accumulates L(i+2)..L(d) right-to-left.
-  vector<ITensor> envi_R(this->d_);
+  std::vector<ITensor> envi_R(this->d_);
   envi_R[this->d_ - 2] = L(this->d_) * delta(is(this->d_), is(this->d_ - 1));
   for(int i = this->d_ - 3; i >= 0; --i) {
     int rankl = dim(links(i + 1));
@@ -1049,7 +1048,7 @@ tuple<MPS, vector<ITensor>, vector<ITensor>> TTMetaD::formTensorMoment(const vec
   }
   B.ref(this->d_) = envi_L[this->d_ - 1] * M[this->d_ - 1];
 
-  return make_tuple(B, envi_L, envi_R);
+  return std::make_tuple(B, envi_L, envi_R);
 }
 
 // Compute the tensor moment B and environments for the previous TT bias vb_.
@@ -1057,18 +1056,18 @@ tuple<MPS, vector<ITensor>, vector<ITensor>> TTMetaD::formTensorMoment(const vec
 // Analogous to formTensorMoment but uses vb_ in place of M*coeff: the "samples"
 // are the vb_ bond indices, and the environments are formed by contracting coeff
 // with vb_ core by core. The result has the same structure as Bemp from the new hills.
-tuple<MPS, vector<ITensor>, vector<ITensor>> TTMetaD::formTensorMomentVb(const MPS& coeff) {
+std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorMomentVb(const MPS& coeff) {
   // align vb_ site indices with coeff's site indices for contraction
   for(unsigned i = 1; i <= this->d_; ++i) {
     this->vb_.ref(i) *= delta(siteIndex(this->vb_, i), siteIndex(coeff, i));
   }
-  vector<ITensor> envi_L(this->d_);
+  std::vector<ITensor> envi_L(this->d_);
   envi_L[1] = coeff(1) * this->vb_(1);
   for(unsigned i = 2; i < this->d_; ++i) {
     envi_L[i] = envi_L[i - 1] * coeff(i) * this->vb_(i);
   }
 
-  vector<ITensor> envi_R(this->d_);
+  std::vector<ITensor> envi_R(this->d_);
   envi_R[this->d_ - 2] = coeff(this->d_) * this->vb_(this->d_);
   for(int i = this->d_ - 3; i >= 0; --i) {
     envi_R[i] = envi_R[i + 1] * coeff(i + 2) * this->vb_(i + 2);
@@ -1081,7 +1080,7 @@ tuple<MPS, vector<ITensor>, vector<ITensor>> TTMetaD::formTensorMomentVb(const M
   }
   B.ref(this->d_) = envi_L[this->d_ - 1] * this->vb_(this->d_);
 
-  return make_tuple(B, envi_L, envi_R);
+  return std::make_tuple(B, envi_L, envi_R);
 }
 
 }

From 676a5c1a5758947ce4e2ac00fc5670727c94f93a Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Wed, 1 Apr 2026 09:21:22 -0500
Subject: [PATCH 04/26] module files

---
 src/ttsketch/TTMetaD.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index d5def036aa..1c448b352c 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -2,10 +2,7 @@
  *
  * Tensor train (TT)-metadynamics.
  * https://arxiv.org/abs/2603.13549
- * Before compiling, install the ITensor library (https://github.com/ITensor/ITensor) and ensure that HDF5 support is available.
- * Copy the options.mk to the location of the ITensor installation and edit if needed.
- * Run the following command from the PLUMED source directory (edit libraries if needed to match your options.mk):
- * ./configure --enable-modules=ttsketch LDFLAGS=-L/path/to/ITensor/lib CPPFLAGS="-I/path/to/ITensor -DITENSOR_USE_HDF5 -DITENSOR_USE_OMP -D__ITENSOR_LAPACK_WRAP_h" LIBS="-litensor -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lhdf5 -lhdf5_hl" CXXFLAGS="-O3 -fconcepts"
+ * See module.md for installation instructions.
  */
 
 #include "TTHelper.h"

From 13d234a3c0f8a2a6b1bfbedf38da38cbe82bcab5 Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Wed, 1 Apr 2026 09:23:22 -0500
Subject: [PATCH 05/26] not ignore module files

---
 src/ttsketch/.gitignore |  2 +-
 src/ttsketch/module.md  | 54 +++++++++++++++++++++++++++++++++++++++++
 src/ttsketch/module.yml |  4 +++
 3 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 src/ttsketch/module.md
 create mode 100644 src/ttsketch/module.yml

diff --git a/src/ttsketch/.gitignore b/src/ttsketch/.gitignore
index efe518894f..aebbb246c1 100644
--- a/src/ttsketch/.gitignore
+++ b/src/ttsketch/.gitignore
@@ -6,6 +6,6 @@
 !/*.h
 !/Makefile
 !/README
-!/module.type
+!/module.*
 !/COPYRIGHT
 !/options.mk
diff --git a/src/ttsketch/module.md b/src/ttsketch/module.md
new file mode 100644
index 0000000000..8d4f6951e7
--- /dev/null
+++ b/src/ttsketch/module.md
@@ -0,0 +1,54 @@
+<!--
+description: Tensor train metadynamics (TT-MetaD)
+authors: Nils E. Strand, Siyao Yang, Yuehaw Khoo, Aaron R. Dinner
+reference: https://arxiv.org/abs/2603.13549
+-->
+
+# Overview
+
+This module implements tensor train metadynamics (TT-MetaD), a method that
+represents the metadynamics bias potential as a tensor train (TT) rather than
+as a sum of Gaussian hills. The accumulated Gaussian hills are periodically
+compressed into a low-rank TT approximation using the TT-Sketch algorithm,
+keeping memory and evaluation cost bounded throughout long simulations.
+
+For the full algorithmic details see the paper:
+<https://arxiv.org/abs/2603.13549>
+
+
+# Installation
+
+This module requires the [ITensor] library (v3) with HDF5 support.
+
+## Building ITensor
+
+Follow the [ITensor installation instructions]. Copy `options.mk` to the
+ITensor source directory and edit it to match your compiler and LAPACK/BLAS
+setup. Then build:
+
+```bash
+cd /path/to/ITensor
+cp options.mk.sample options.mk
+# edit options.mk as needed
+make
+```
+
+## Building PLUMED with ttsketch
+
+Once ITensor is built, configure PLUMED from its source directory. Adjust
+library paths and flags to match your `options.mk`:
+
+```bash
+./configure --enable-modules=ttsketch \
+    LDFLAGS="-L/path/to/ITensor/lib" \
+    CPPFLAGS="-I/path/to/ITensor -DITENSOR_USE_HDF5 -DITENSOR_USE_OMP -D__ITENSOR_LAPACK_WRAP_h" \
+    LIBS="-litensor -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lhdf5 -lhdf5_hl" \
+    CXXFLAGS="-O3 -fconcepts"
+```
+
+The flags above assume Intel MKL for LAPACK/BLAS. If you use OpenBLAS or
+another provider, replace the `-lmkl_*` flags accordingly.
+
+
+[ITensor]: https://github.com/ITensor/ITensor
+[ITensor installation instructions]: https://itensor.org/docs.cgi?page=install
diff --git a/src/ttsketch/module.yml b/src/ttsketch/module.yml
new file mode 100644
index 0000000000..9127ae88b1
--- /dev/null
+++ b/src/ttsketch/module.yml
@@ -0,0 +1,4 @@
+name: ttsketch
+authors: Nils E. Strand, Siyao Yang, Yuehaw Khoo, and Aaron R. Dinner
+description: TT-Metadynamics, a variant of metadynamics where the bias potential is periodically compressed into a tensor train (TT); useful for high-dimensional bias potentials (many CVs)
+dois: []

From 47667db4f308199f6d2d09f13f27a70e7df9ab77 Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Wed, 1 Apr 2026 09:35:05 -0500
Subject: [PATCH 06/26] citations

---
 src/ttsketch/TTMetaD.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index 1c448b352c..d5ae778515 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -334,6 +334,10 @@ void TTMetaD::readGaussians(IFile *ifile) {
   }
   log << "  restarting from step " << this->sketch_count_ << "\n";
   log << "  " << nhills << " hills retrieved\n";
+
+  log << "  Bibliography " << plumed.cite("Strand, Yang, Khoo, and Dinner, https://arxiv.org/abs/2603.13549 (2026)");
+  log << plumed.cite("Fishman, White, and Stoudenmire, https://arxiv.org/abs/2007.14822 (2020)");
+  log << "\n";
 }
 
 bool TTMetaD::scanOneHill(IFile* ifile, std::vector<Value>& tmpvalues, std::vector<double>& center, std::vector<double>& sigma, double& height) {

From 6294c05d61657537baf79284c6010d98002658bf Mon Sep 17 00:00:00 2001
From: Nils Strand <nstrand@midway3-login3.rcc.local>
Date: Fri, 3 Apr 2026 09:40:55 -0500
Subject: [PATCH 07/26] astyle

---
 src/ttsketch/BasisFunc.cpp | 81 +++++++++++++++++++-------------------
 src/ttsketch/BasisFunc.h   | 32 +++++++++++----
 src/ttsketch/TTMetaD.cpp   |  7 ++--
 3 files changed, 67 insertions(+), 53 deletions(-)

diff --git a/src/ttsketch/BasisFunc.cpp b/src/ttsketch/BasisFunc.cpp
index 39c837fe1c..746dbb5682 100644
--- a/src/ttsketch/BasisFunc.cpp
+++ b/src/ttsketch/BasisFunc.cpp
@@ -10,8 +10,7 @@ BasisFunc::BasisFunc(std::pair<double, double> dom, int nbasis, double w, bool k
   : dom_(dom), nbasis_(nbasis), L_((dom.second - dom.first) * 0.5), shift_((dom.second + dom.first) * 0.5),
     inv_L_(2.0 / (dom.second - dom.first)), sqrt_inv_L_(sqrt(2.0 / (dom.second - dom.first))),
     inv_sqrt_2L_(sqrt(1.0 / (dom.second - dom.first))),
-    w_(w), kernel_(kernel), dx_(dx)
-{
+    w_(w), kernel_(kernel), dx_(dx) {
   if(kernel) {
     double spacing = (dom.second - dom.first) / (nbasis - 1);
     if(dx == 0) {
@@ -34,12 +33,12 @@ BasisFunc::BasisFunc(std::pair<double, double> dom, int nbasis, double w, bool k
     for(int i = 1; i < nbasis; ++i) {
       // G(0,i) = G(i,0) = int phi_{i+1}(x) dx (integral of a periodized Gaussian)
       this->gram_(i, 0) = this->gram_(0, i) = this->dx_ * sqrt(M_PI * 0.5) *
-                                (erf((this->dom_.second -
-                                2 * this->dom_.first + this->centers_[i - 1]) /
-                                (sqrt(2) * this->dx_)) -
-                                erf((this->dom_.first - 2 * this->dom_.second +
-                                this->centers_[i - 1]) /
-                                (sqrt(2) * this->dx_)));
+                                              (erf((this->dom_.second -
+                                                  2 * this->dom_.first + this->centers_[i - 1]) /
+                                                  (sqrt(2) * this->dx_)) -
+                                                  erf((this->dom_.first - 2 * this->dom_.second +
+                                                      this->centers_[i - 1]) /
+                                                      (sqrt(2) * this->dx_)));
       // G(i,j) = int phi_{i+1}(x)*phi_{j+1}(x) dx; the double sum over k,l
       // accounts for cross-terms between periodic images of each kernel.
       for(int j = i; j < nbasis; ++j) {
@@ -47,14 +46,14 @@ BasisFunc::BasisFunc(std::pair<double, double> dom, int nbasis, double w, bool k
         for(int k = -1; k <= 1; ++k) {
           for(int l = -1; l <= 1; ++l) {
             result += this->dx_ * 0.5 * exp(-pow((this->dom_.first -
-                      this->dom_.second) * (k - l) + this->centers_[i - 1] -
-                      this->centers_[j - 1], 2) / (4 * pow(this->dx_, 2))) *
+                                                  this->dom_.second) * (k - l) + this->centers_[i - 1] -
+                                                 this->centers_[j - 1], 2) / (4 * pow(this->dx_, 2))) *
                       sqrt(M_PI) * (erf((this->dom_.first * (k + l - 2) -
-                      this->dom_.second * (k + l) + this->centers_[i - 1] +
-                      this->centers_[j - 1]) / (2 * this->dx_)) -
-                      erf((this->dom_.first * (k + l) - this->dom_.second *
-                      (k + l + 2) + this->centers_[i - 1] +
-                      this->centers_[j - 1]) / (2 * this->dx_)));
+                                         this->dom_.second * (k + l) + this->centers_[i - 1] +
+                                         this->centers_[j - 1]) / (2 * this->dx_)) -
+                                    erf((this->dom_.first * (k + l) - this->dom_.second *
+                                         (k + l + 2) + this->centers_[i - 1] +
+                                         this->centers_[j - 1]) / (2 * this->dx_)));
           }
         }
         this->gram_(i, j) = this->gram_(j, i) = result;
@@ -123,7 +122,7 @@ double BasisFunc::gaussiand(double x, int pos) const {
     for(int k = -1; k <= 1; ++k) {
       result += (this->centers_[pos - 2] - x - 2 * k * this->L_) /
                 pow(this->dx_, 2) * exp(-pow(x - this->centers_[pos - 2] +
-                2 * k * this->L_, 2) / (2 * pow(this->dx_, 2)));
+                                             2 * k * this->L_, 2) / (2 * pow(this->dx_, 2)));
     }
     return result;
   }
@@ -145,9 +144,9 @@ double BasisFunc::operator()(double x, int pos, bool conv) const {
         double result = 0.0;
         for(int k = -1; k <= 1; ++k) {
           result += exp(-pow(x - this->centers_[pos - 2] +
-                    2 * k * this->L_, 2) / (2 * (pow(this->dx_, 2) +
-                    pow(this->w_, 2)))) / (sqrt(1 / pow(this->dx_, 2) + 1 /
-                    pow(this->w_, 2)) * this->w_);
+                             2 * k * this->L_, 2) / (2 * (pow(this->dx_, 2) +
+                                 pow(this->w_, 2)))) / (sqrt(1 / pow(this->dx_, 2) + 1 /
+                                     pow(this->w_, 2)) * this->w_);
         }
         return result;
       }
@@ -179,11 +178,11 @@ double BasisFunc::grad(double x, int pos, bool conv) const {
         double result = 0.0;
         for(int k = -1; k <= 1; ++k) {
           result += pow(this->dx_, 2) * exp(-pow(x - this->centers_[pos - 2] +
-                    2 * k * this->L_, 2) / (2 * (pow(this->dx_, 2) +
-                    pow(this->w_, 2)))) * (this->centers_[pos - 2] -
-                    2 * k * this->L_ - x) * sqrt(1 / pow(this->dx_, 2) +
-                    1 / pow(this->w_, 2)) * this->w_ / pow(pow(this->dx_, 2) +
-                    pow(this->w_, 2), 2);
+                                                 2 * k * this->L_, 2) / (2 * (pow(this->dx_, 2) +
+                                                     pow(this->w_, 2)))) * (this->centers_[pos - 2] -
+                                                         2 * k * this->L_ - x) * sqrt(1 / pow(this->dx_, 2) +
+                                                             1 / pow(this->w_, 2)) * this->w_ / pow(pow(this->dx_, 2) +
+                                                                 pow(this->w_, 2), 2);
         }
         return result;
       }
@@ -212,10 +211,10 @@ double BasisFunc::int0(int pos) const {
       return this->dom_.second - this->dom_.first;
     } else {
       return -this->dx_ * sqrt(M_PI * 0.5) * (erf((2 * this->dom_.first -
-             this->dom_.second - this->centers_[pos - 2]) /
-             (sqrt(2) * this->dx_)) + erf((this->dom_.first -
-             2 * this->dom_.second + this->centers_[pos - 2]) /
-             (sqrt(2) * this->dx_)));
+                                              this->dom_.second - this->centers_[pos - 2]) /
+                                              (sqrt(2) * this->dx_)) + erf((this->dom_.first -
+                                                  2 * this->dom_.second + this->centers_[pos - 2]) /
+                                                  (sqrt(2) * this->dx_)));
     }
   } else {
     if(pos == 1) {
@@ -242,12 +241,12 @@ double BasisFunc::int1(int pos) const {
       double dx_sq = pow(this->dx_, 2);
       double sqrt2pi = sqrt(2 * M_PI);
       double term1 = exp(-pow(b - 2 * a + c, 2) / (2 * dx_sq)) -
-                    exp(-pow(a - 2 * b + c, 2) / (2 * dx_sq));
+                     exp(-pow(a - 2 * b + c, 2) / (2 * dx_sq));
       double term2 = (b - a + c) * sqrt2pi * erf((a - c) / (sqrt(2) * this->dx_));
       double term3 = (a - b - c) * sqrt2pi * erf((2 * a - b - c) / (sqrt(2) * this->dx_));
       double term4 = sqrt2pi * (c * erf((-a + c) / (sqrt(2) * this->dx_)) -
-                      (a - b + c) * erf((a - 2 * b + c) / (sqrt(2) * this->dx_)) +
-                      (a - b) * erf((-b + c) / (sqrt(2) * this->dx_)));
+                                (a - b + c) * erf((a - 2 * b + c) / (sqrt(2) * this->dx_)) +
+                                (a - b) * erf((-b + c) / (sqrt(2) * this->dx_)));
       return this->dx_ * 0.5 * (2 * this->dx_ * term1 + term2 + term3 + term4);
     }
   } else {
@@ -281,10 +280,10 @@ double BasisFunc::int2(int pos) const {
       double exp3 = exp(-pow(a - 2 * b + c, 2) / (2 * dx_sq));
       double exp4 = exp(-pow(b - 2 * a + c, 2) / (2 * dx_sq));
       double term1 = 2 * this->dx_ * (
-          a * (2 * exp1 + 2 * exp2 - exp3) +
-          b * (exp4 - 2 * exp1 - 2 * exp2) +
-          c * (exp4 - exp3)
-      );
+                       a * (2 * exp1 + 2 * exp2 - exp3) +
+                       b * (exp4 - 2 * exp1 - 2 * exp2) +
+                       c * (exp4 - exp3)
+                     );
       double diff1_sq = pow(b - a + c, 2) + dx_sq;
       double diff2_sq = pow(a - b + c, 2) + dx_sq;
       double c_sq = pow(c, 2) + dx_sq;
@@ -295,9 +294,9 @@ double BasisFunc::int2(int pos) const {
       double erf5 = erf((c - b) / sqrt2_dx);
       double term2 = diff1_sq * sqrt2pi * erf1 - diff1_sq * sqrt2pi * erf2;
       double term3 = sqrt2pi * (
-          c_sq * erf3 - diff2_sq * erf4 +
-          (a - b) * (a - b + 2 * c) * erf5
-      );
+                       c_sq * erf3 - diff2_sq * erf4 +
+                       (a - b) * (a - b + 2 * c) * erf5
+                     );
       return this->dx_ * 0.5 * (term1 + term2 + term3);
     }
   } else {
@@ -306,9 +305,9 @@ double BasisFunc::int2(int pos) const {
     } else if(pos % 2 == 0) {
       double k = floor(0.5 * pos);
       return 2 * sqrt(this->L_) / pow(M_PI * k, 3) *
-            (2 * pow(this->L_, 2) * M_PI * k * cos(M_PI * k) +
-            (pow(M_PI * k, 2) * (pow(this->shift_, 2) +
-            pow(this->L_, 2)) - 2 * pow(this->L_, 2)) * sin(M_PI * k));
+             (2 * pow(this->L_, 2) * M_PI * k * cos(M_PI * k) +
+              (pow(M_PI * k, 2) * (pow(this->shift_, 2) +
+                                   pow(this->L_, 2)) - 2 * pow(this->L_, 2)) * sin(M_PI * k));
     } else {
       double k = floor(0.5 * pos);
       return 4 * this->shift_ * pow(this->L_, 1.5) / pow(M_PI * k, 2) * (sin(M_PI * k) - M_PI * k * cos(M_PI * k));
diff --git a/src/ttsketch/BasisFunc.h b/src/ttsketch/BasisFunc.h
index 5231a1bec1..a7f4836c55 100644
--- a/src/ttsketch/BasisFunc.h
+++ b/src/ttsketch/BasisFunc.h
@@ -56,9 +56,15 @@ class BasisFunc {
   double operator()(double x, int pos, bool conv) const;
   // Derivative of operator()(x, pos, conv) w.r.t. x.
   double grad(double x, int pos, bool conv) const;
-  int nbasis() const { return this->nbasis_; }
-  const std::pair<double, double>& dom() const { return this->dom_; }
-  double w() const { return this->w_; }
+  int nbasis() const {
+    return this->nbasis_;
+  }
+  const std::pair<double, double>& dom() const {
+    return this->dom_;
+  }
+  double w() const {
+    return this->w_;
+  }
   // Integral of phi_pos over the domain: int_a^b phi_pos(x) dx.
   // Used to compute the partition function Z in covMat().
   double int0(int pos) const;
@@ -68,11 +74,21 @@ class BasisFunc {
   // Integral of x^2*phi_pos over the domain: int_a^b x^2*phi_pos(x) dx.
   // Used to compute E[x^2] for the marginal variance in covMat().
   double int2(int pos) const;
-  bool kernel() const { return this->kernel_; }
-  const Matrix<double>& ginv() const { return this->ginv_; }
-  const Matrix<double>& gram() const { return this->gram_; }
-  double center(int pos) const { return this->kernel_ ? this->centers_[pos - 1] : 0.0; }
-  double dx() const { return this->dx_; }
+  bool kernel() const {
+    return this->kernel_;
+  }
+  const Matrix<double>& ginv() const {
+    return this->ginv_;
+  }
+  const Matrix<double>& gram() const {
+    return this->gram_;
+  }
+  double center(int pos) const {
+    return this->kernel_ ? this->centers_[pos - 1] : 0.0;
+  }
+  double dx() const {
+    return this->dx_;
+  }
 };
 
 }
diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index d5ae778515..98a25fe622 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -71,7 +71,7 @@ class TTMetaD : public Bias {
   bool frozen_;              // true once the bias has been frozen (after sketch_until_)
   bool sketch_conv_;         // true if Gaussian smoothing (conv mode) is active for basis evaluation
   bool nonintrusive_;        // if true, uses the intrusive-free variant: accumulates Bemp directly
-                             // instead of re-projecting the previous vb_ through the Gram matrix
+  // instead of re-projecting the previous vb_ through the Gram matrix
   bool deterministic_;       // if true, use a fixed RNG seed (42) in createTTCoeff() for reproducibility
   MPS B_prev_;               // accumulated tensor moment from all previous sketches (nonintrusive only)
   std::vector<ITensor> A_prev_;   // accumulated cross-gram matrices per bond (nonintrusive only)
@@ -147,8 +147,7 @@ TTMetaD::TTMetaD(const ActionOptions& ao):
   frozen_(false),
   sketch_conv_(false),
   nonintrusive_(false),
-  deterministic_(false)
-{
+  deterministic_(false) {
   bool kernel;
   parseFlag("KERNEL_BASIS", kernel);
   this->d_ = getNumberOfArguments();
@@ -806,7 +805,7 @@ void TTMetaD::paraSketch() {
     V[core_id - 1] = ITensor(links(core_id - 1));
     if(this->sketch_r_ > 0) {
       svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1],
-          {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags, "MaxDim=", this->sketch_r_});
+      {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags, "MaxDim=", this->sketch_r_});
     } else {
       svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1], {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags});
     }

From 741e13b1acc27cfe05b350cee5f8065b20c6ba21 Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Fri, 3 Apr 2026 20:27:25 -0500
Subject: [PATCH 08/26] ci script + edit runner

---
 .ci/install.itensor           | 29 +++++++++++++
 .ci/options.mk.itensor        | 78 +++++++++++++++++++++++++++++++++++
 .github/workflows/linuxWF.yml | 15 +++++++
 3 files changed, 122 insertions(+)
 create mode 100644 .ci/install.itensor
 create mode 100644 .ci/options.mk.itensor

diff --git a/.ci/install.itensor b/.ci/install.itensor
new file mode 100644
index 0000000000..9ab4e9a0c0
--- /dev/null
+++ b/.ci/install.itensor
@@ -0,0 +1,29 @@
+#! /usr/bin/env bash
+
+set -e
+set -x
+
+# Directory containing this script (i.e. .ci/)
+CI_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Install ITensor v3 dependencies
+sudo apt-get install -y libopenblas-dev libhdf5-dev
+
+ITENSOR_VERSION=3.2.0
+ITENSOR_INSTALL_DIR="$HOME/opt/itensor"
+
+cd "$(mktemp -dt plumed.XXXXXX)"
+
+wget https://github.com/ITensor/ITensor/archive/refs/tags/v${ITENSOR_VERSION}.tar.gz
+tar xzf v${ITENSOR_VERSION}.tar.gz
+cd ITensor-${ITENSOR_VERSION}
+
+# Use the CI-specific options.mk (OpenBLAS + system HDF5)
+cp "$CI_DIR/options.mk.itensor" options.mk
+
+make
+
+# Copy the built library and headers to the install directory
+mkdir -p "$ITENSOR_INSTALL_DIR"
+cp -r lib "$ITENSOR_INSTALL_DIR/"
+cp -r itensor "$ITENSOR_INSTALL_DIR/"
diff --git a/.ci/options.mk.itensor b/.ci/options.mk.itensor
new file mode 100644
index 0000000000..659180b4b8
--- /dev/null
+++ b/.ci/options.mk.itensor
@@ -0,0 +1,78 @@
+### CI-specific options.mk for ITensor v3
+### Uses OpenBLAS and system HDF5 (installed via apt on ubuntu-22.04).
+### Copy this file to the ITensor source directory as options.mk before building.
+
+## Compiler
+CCCOM=g++ -m64 -std=c++17 -fconcepts -fPIC
+
+## BLAS/LAPACK: use OpenBLAS (apt install libopenblas-dev)
+PLATFORM=lapack
+BLAS_LAPACK_LIBFLAGS=-lpthread -lopenblas
+
+## HDF5: use system installation (apt install libhdf5-dev)
+HDF5_PREFIX=/usr
+
+## OpenMP multithreading
+ITENSOR_USE_OMP=1
+
+## Optimization flags
+OPTIMIZATIONS=-O2 -DNDEBUG -Wall -Wno-unknown-pragmas
+
+## Debug flags
+DEBUGFLAGS=-DDEBUG -g -Wall -Wno-unknown-pragmas -pedantic
+
+## Do not build dynamic libraries (static linking is simpler in CI)
+ITENSOR_MAKE_DYLIB=0
+
+###
+### Remaining variables are derived; no need to modify below.
+###
+
+PREFIX=$(THIS_DIR)
+
+ITENSOR_LIBDIR=$(PREFIX)/lib
+ITENSOR_INCLUDEDIR=$(PREFIX)
+
+ITENSOR_LIBNAMES=itensor
+ITENSOR_LIBFLAGS=$(patsubst %,-l%, $(ITENSOR_LIBNAMES))
+ITENSOR_LIBFLAGS+= $(BLAS_LAPACK_LIBFLAGS)
+ITENSOR_LIBGFLAGS=$(patsubst %,-l%-g, $(ITENSOR_LIBNAMES))
+ITENSOR_LIBGFLAGS+= $(BLAS_LAPACK_LIBFLAGS)
+ITENSOR_LIBS=$(patsubst %,$(ITENSOR_LIBDIR)/lib%.a, $(ITENSOR_LIBNAMES))
+ITENSOR_GLIBS=$(patsubst %,$(ITENSOR_LIBDIR)/lib%-g.a, $(ITENSOR_LIBNAMES))
+
+ITENSOR_INCLUDEFLAGS=-I'$(ITENSOR_INCLUDEDIR)' $(BLAS_LAPACK_INCLUDEFLAGS)
+
+ifdef HDF5_PREFIX
+ITENSOR_USE_HDF5 = 1
+# -DH5_USE_110_API: use HDF5 1.10-compatible API on HDF5 >= 1.12, which changed
+# H5Oget_info_by_name to a 5-argument form incompatible with ITensor's h5 utility code.
+ITENSOR_INCLUDEFLAGS += -I$(HDF5_PREFIX)/include -DITENSOR_USE_HDF5 -DH5_USE_110_API
+ITENSOR_LIBFLAGS += -L$(HDF5_PREFIX)/lib -lhdf5 -lhdf5_hl
+ITENSOR_LIBGFLAGS += -L$(HDF5_PREFIX)/lib -lhdf5 -lhdf5_hl
+endif
+
+ifndef CCCOM
+$(error Makefile variable CCCOM not defined in options.mk; please define it.)
+endif
+
+ifdef ITENSOR_USE_OMP
+ITENSOR_INCLUDEFLAGS += -DITENSOR_USE_OMP -fopenmp
+ITENSOR_LIBFLAGS += -fopenmp
+ITENSOR_LIBGFLAGS += -fopenmp
+endif
+
+CCFLAGS=-I. $(ITENSOR_INCLUDEFLAGS) $(OPTIMIZATIONS) -Wno-unused-variable
+CCGFLAGS=-I. $(ITENSOR_INCLUDEFLAGS) $(DEBUGFLAGS)
+LIBFLAGS=-L'$(ITENSOR_LIBDIR)' $(ITENSOR_LIBFLAGS)
+LIBGFLAGS=-L'$(ITENSOR_LIBDIR)' $(ITENSOR_LIBGFLAGS)
+
+## Determine shared library extension
+UNAME_S := $(shell uname -s)
+ifeq ($(UNAME_S),Darwin)
+  DYLIB_EXT ?= dylib
+  DYLIB_FLAGS ?= -dynamiclib
+else
+  DYLIB_EXT ?= so
+  DYLIB_FLAGS ?= -shared
+endif
diff --git a/.github/workflows/linuxWF.yml b/.github/workflows/linuxWF.yml
index 4ad0078532..23f0ab8b52 100644
--- a/.github/workflows/linuxWF.yml
+++ b/.github/workflows/linuxWF.yml
@@ -68,6 +68,21 @@ jobs:
 
         # the flags above enable the use of both libmetatomic and libtorch
         echo "PLUMED_CONFIG=$PLUMED_CONFIG --enable-libmetatomic --enable-libtorch" >> $GITHUB_ENV
+    - name: Install ITensor
+      if: ${{ contains( matrix.variant, '-mpi-' ) }}
+      run: |
+        .ci/install.itensor
+    - name: Configure ITensor
+      if: ${{ contains( matrix.variant, '-mpi-' ) }}
+      run: |
+        ITENSOR_DIR="$HOME/opt/itensor"
+        echo "LDFLAGS=$LDFLAGS -L${ITENSOR_DIR}/lib" >> $GITHUB_ENV
+        echo "CPPFLAGS=$CPPFLAGS -I${ITENSOR_DIR} -DITENSOR_USE_HDF5 -DITENSOR_USE_OMP -D__ITENSOR_LAPACK_WRAP_h -DH5_USE_110_API" >> $GITHUB_ENV
+        # temporary until configure.ac is updated to detect ITensor
+        echo "CXXFLAGS=-O3 -fconcepts" >> $GITHUB_ENV
+        echo "LIBS=-litensor -lopenblas -lhdf5 -lhdf5_hl" >> $GITHUB_ENV
+        # this will be uncommented after the configure.ac modifications
+        #echo "PLUMED_CONFIG=$PLUMED_CONFIG --enable-libitensor" >> $GITHUB_ENV
     - name: Install tools for setting up the manuals
       if: contains( matrix.variant, '-doc-' )
       run: |

From 5004739222a5ef5648358df9329793b8127caaa7 Mon Sep 17 00:00:00 2001
From: Nils Strand <nstrand@midway3-login3.rcc.local>
Date: Fri, 17 Apr 2026 15:01:39 -0500
Subject: [PATCH 09/26] ci permissions

---
 .ci/install.itensor | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 .ci/install.itensor

diff --git a/.ci/install.itensor b/.ci/install.itensor
old mode 100644
new mode 100755

From 3e3a52e280eed505c6e28d5b52e96581244b29ab Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Fri, 17 Apr 2026 15:35:31 -0500
Subject: [PATCH 10/26] attempted codecheck related fixes

---
 src/ttsketch/BasisFunc.cpp |  5 +++
 src/ttsketch/TTHelper.cpp  |  5 +++
 src/ttsketch/TTHelper.h    | 78 +++++++++++++++++++-------------------
 3 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/src/ttsketch/BasisFunc.cpp b/src/ttsketch/BasisFunc.cpp
index 746dbb5682..318ca6a748 100644
--- a/src/ttsketch/BasisFunc.cpp
+++ b/src/ttsketch/BasisFunc.cpp
@@ -1,3 +1,8 @@
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+   Copyright (c) 2026 Nils E. Strand, Siyao Yang, Yuehaw Khoo, Aaron R. Dinner
+
+   See the COPYRIGHT file distributed with this software for license details.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
 #include "BasisFunc.h"
 
 namespace PLMD {
diff --git a/src/ttsketch/TTHelper.cpp b/src/ttsketch/TTHelper.cpp
index 62210e3794..0a6dfec51a 100644
--- a/src/ttsketch/TTHelper.cpp
+++ b/src/ttsketch/TTHelper.cpp
@@ -1,3 +1,8 @@
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+   Copyright (c) 2026 Nils E. Strand, Siyao Yang, Yuehaw Khoo, Aaron R. Dinner
+
+   See the COPYRIGHT file distributed with this software for license details.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
 #include "TTHelper.h"
 
 using namespace itensor;
diff --git a/src/ttsketch/TTHelper.h b/src/ttsketch/TTHelper.h
index ecaefec3ac..76beb0897e 100644
--- a/src/ttsketch/TTHelper.h
+++ b/src/ttsketch/TTHelper.h
@@ -1,39 +1,39 @@
-#ifndef __PLUMED_ttsketch_TTHelper_h
-#define __PLUMED_ttsketch_TTHelper_h
-#include "BasisFunc.h"
-#include "tools/Matrix.h"
-#include "itensor/all.h"
-
-namespace PLMD {
-namespace ttsketch {
-
-// Write TT to HDF5 file under dataset "tt_{count-1}".
-// count=2 opens the file for writing (creating/truncating); subsequent calls append.
-void ttWrite(const std::string& filename, const itensor::MPS& tt, unsigned count);
-// Read TT from HDF5 file at dataset "tt_{count-1}".
-itensor::MPS ttRead(const std::string& filename, unsigned count);
-// Append the cumulative bias TT (vb) to HDF5 file under dataset "vb_{count-1}".
-void ttSumWrite(const std::string& filename, const itensor::MPS& tt, unsigned count);
-// Read cumulative bias TT from HDF5 file at dataset "vb_{count-1}".
-itensor::MPS ttSumRead(const std::string& filename, unsigned count);
-// Evaluate the TT bias at point `elements` by contracting each core with its
-// 1D basis vector. If conv=true, basis functions are evaluated in convolution mode.
-double ttEval(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, const std::vector<double>& elements, bool conv);
-// Gradient of ttEval w.r.t. `elements`. Computed by replacing the k-th dimension's
-// basis vector with its derivative while keeping all others unchanged (chain rule).
-std::vector<double> ttGrad(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, const std::vector<double>& elements, bool conv);
-// Compute the covariance matrix, mean vector, and partition function of the TT distribution.
-// Normalizes tt by Z = int tt(x) dx, then returns:
-//   sigma(k,l) = E[x_k * x_l] - E[x_k]*E[x_l]  (covariance matrix)
-//   mu[k]      = E[x_k]                           (marginal means)
-//   Z          = int tt(x) dx                     (partition function)
-// All expectations use int0/int1/int2 evaluated analytically via BasisFunc.
-std::tuple<Matrix<double>, std::vector<double>, double> covMat(const itensor::MPS& tt, const std::vector<BasisFunc>& basis);
-// Fill `grid` (bins x bins) with the 2D marginal density of `tt` for dimensions
-// pos1 and pos2, obtained by integrating out all other dimensions analytically.
-void marginal2d(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, int pos1, int pos2, std::vector<std::vector<double>>& grid, bool conv);
-
-}
-}
-
-#endif
+#ifndef __PLUMED_ttsketch_TTHelper_h
+#define __PLUMED_ttsketch_TTHelper_h
+#include "BasisFunc.h"
+#include "tools/Matrix.h"
+#include <itensor/all.h>
+
+namespace PLMD {
+namespace ttsketch {
+
+// Write TT to HDF5 file under dataset "tt_{count-1}".
+// count=2 opens the file for writing (creating/truncating); subsequent calls append.
+void ttWrite(const std::string& filename, const itensor::MPS& tt, unsigned count);
+// Read TT from HDF5 file at dataset "tt_{count-1}".
+itensor::MPS ttRead(const std::string& filename, unsigned count);
+// Append the cumulative bias TT (vb) to HDF5 file under dataset "vb_{count-1}".
+void ttSumWrite(const std::string& filename, const itensor::MPS& tt, unsigned count);
+// Read cumulative bias TT from HDF5 file at dataset "vb_{count-1}".
+itensor::MPS ttSumRead(const std::string& filename, unsigned count);
+// Evaluate the TT bias at point `elements` by contracting each core with its
+// 1D basis vector. If conv=true, basis functions are evaluated in convolution mode.
+double ttEval(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, const std::vector<double>& elements, bool conv);
+// Gradient of ttEval w.r.t. `elements`. Computed by replacing the k-th dimension's
+// basis vector with its derivative while keeping all others unchanged (chain rule).
+std::vector<double> ttGrad(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, const std::vector<double>& elements, bool conv);
+// Compute the covariance matrix, mean vector, and partition function of the TT distribution.
+// Normalizes tt by Z = int tt(x) dx, then returns:
+//   sigma(k,l) = E[x_k * x_l] - E[x_k]*E[x_l]  (covariance matrix)
+//   mu[k]      = E[x_k]                           (marginal means)
+//   Z          = int tt(x) dx                     (partition function)
+// All expectations use int0/int1/int2 evaluated analytically via BasisFunc.
+std::tuple<Matrix<double>, std::vector<double>, double> covMat(const itensor::MPS& tt, const std::vector<BasisFunc>& basis);
+// Fill `grid` (bins x bins) with the 2D marginal density of `tt` for dimensions
+// pos1 and pos2, obtained by integrating out all other dimensions analytically.
+void marginal2d(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, int pos1, int pos2, std::vector<std::vector<double>>& grid, bool conv);
+
+}
+}
+
+#endif

From f16830ec256558ba8f22d43c9fffe932212c9c99 Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Fri, 24 Apr 2026 15:51:57 -0500
Subject: [PATCH 11/26] corrected hdf5 header location

---
 .ci/options.mk.itensor        | 13 +++++++------
 .github/workflows/linuxWF.yml |  6 ++++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/.ci/options.mk.itensor b/.ci/options.mk.itensor
index 659180b4b8..759face84d 100644
--- a/.ci/options.mk.itensor
+++ b/.ci/options.mk.itensor
@@ -9,8 +9,9 @@ CCCOM=g++ -m64 -std=c++17 -fconcepts -fPIC
 PLATFORM=lapack
 BLAS_LAPACK_LIBFLAGS=-lpthread -lopenblas
 
-## HDF5: use system installation (apt install libhdf5-dev)
-HDF5_PREFIX=/usr
+## HDF5
+HDF5_CFLAGS  := $(shell pkg-config --cflags hdf5-serial 2>/dev/null || pkg-config --cflags hdf5 2>/dev/null)
+HDF5_LDFLAGS := $(shell pkg-config --libs-only-L hdf5-serial 2>/dev/null || pkg-config --libs-only-L hdf5 2>/dev/null)
 
 ## OpenMP multithreading
 ITENSOR_USE_OMP=1
@@ -43,13 +44,13 @@ ITENSOR_GLIBS=$(patsubst %,$(ITENSOR_LIBDIR)/lib%-g.a, $(ITENSOR_LIBNAMES))
 
 ITENSOR_INCLUDEFLAGS=-I'$(ITENSOR_INCLUDEDIR)' $(BLAS_LAPACK_INCLUDEFLAGS)
 
-ifdef HDF5_PREFIX
+ifneq ($(strip $(HDF5_CFLAGS)),)
 ITENSOR_USE_HDF5 = 1
 # -DH5_USE_110_API: use HDF5 1.10-compatible API on HDF5 >= 1.12, which changed
 # H5Oget_info_by_name to a 5-argument form incompatible with ITensor's h5 utility code.
-ITENSOR_INCLUDEFLAGS += -I$(HDF5_PREFIX)/include -DITENSOR_USE_HDF5 -DH5_USE_110_API
-ITENSOR_LIBFLAGS += -L$(HDF5_PREFIX)/lib -lhdf5 -lhdf5_hl
-ITENSOR_LIBGFLAGS += -L$(HDF5_PREFIX)/lib -lhdf5 -lhdf5_hl
+ITENSOR_INCLUDEFLAGS += $(HDF5_CFLAGS) -DITENSOR_USE_HDF5 -DH5_USE_110_API
+ITENSOR_LIBFLAGS += $(HDF5_LDFLAGS) -lhdf5 -lhdf5_hl
+ITENSOR_LIBGFLAGS += $(HDF5_LDFLAGS) -lhdf5 -lhdf5_hl
 endif
 
 ifndef CCCOM
diff --git a/.github/workflows/linuxWF.yml b/.github/workflows/linuxWF.yml
index 23f0ab8b52..62dfbd0282 100644
--- a/.github/workflows/linuxWF.yml
+++ b/.github/workflows/linuxWF.yml
@@ -76,8 +76,10 @@ jobs:
       if: ${{ contains( matrix.variant, '-mpi-' ) }}
       run: |
         ITENSOR_DIR="$HOME/opt/itensor"
-        echo "LDFLAGS=$LDFLAGS -L${ITENSOR_DIR}/lib" >> $GITHUB_ENV
-        echo "CPPFLAGS=$CPPFLAGS -I${ITENSOR_DIR} -DITENSOR_USE_HDF5 -DITENSOR_USE_OMP -D__ITENSOR_LAPACK_WRAP_h -DH5_USE_110_API" >> $GITHUB_ENV
+        HDF5_CFLAGS=$(pkg-config --cflags hdf5-serial 2>/dev/null || pkg-config --cflags hdf5 2>/dev/null || echo "-I/usr/include/hdf5/serial")
+        HDF5_LIBDIR=$(pkg-config --variable=libdir hdf5-serial 2>/dev/null || pkg-config --variable=libdir hdf5 2>/dev/null || echo "/usr/lib/x86_64-linux-gnu/hdf5/serial")
+        echo "LDFLAGS=$LDFLAGS -L${ITENSOR_DIR}/lib -L${HDF5_LIBDIR}" >> $GITHUB_ENV
+        echo "CPPFLAGS=$CPPFLAGS -I${ITENSOR_DIR} ${HDF5_CFLAGS} -DITENSOR_USE_HDF5 -DITENSOR_USE_OMP -D__ITENSOR_LAPACK_WRAP_h -DH5_USE_110_API" >> $GITHUB_ENV
         # temporary until configure.ac is updated to detect ITensor
         echo "CXXFLAGS=-O3 -fconcepts" >> $GITHUB_ENV
         echo "LIBS=-litensor -lopenblas -lhdf5 -lhdf5_hl" >> $GITHUB_ENV

From 015aefce9202bf68f2d9bc9163e8ae6401e7c05b Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Tue, 28 Apr 2026 14:18:19 -0500
Subject: [PATCH 12/26] minor

---
 src/ttsketch/TTMetaD.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index 98a25fe622..adaae02e5a 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -25,7 +25,6 @@ class TTMetaD : public Bias {
 
 private:
   struct Gaussian {
-    bool multivariate;
     double height;
     std::vector<double> center;
     std::vector<double> sigma;

From 07b55d9f91f3353949d9a65d45446a4aad70c910 Mon Sep 17 00:00:00 2001
From: strandn <nstrand@uchicago.edu>
Date: Tue, 28 Apr 2026 14:20:30 -0500
Subject: [PATCH 13/26] linuxWF.yml: attempt -L hdf5 fix

Co-authored-by: Daniele <Iximiel@users.noreply.github.com>
---
 .github/workflows/linuxWF.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/linuxWF.yml b/.github/workflows/linuxWF.yml
index 38bf23affd..a495d5ec88 100644
--- a/.github/workflows/linuxWF.yml
+++ b/.github/workflows/linuxWF.yml
@@ -77,8 +77,8 @@ jobs:
       run: |
         ITENSOR_DIR="$HOME/opt/itensor"
         HDF5_CFLAGS=$(pkg-config --cflags hdf5-serial 2>/dev/null || pkg-config --cflags hdf5 2>/dev/null || echo "-I/usr/include/hdf5/serial")
-        HDF5_LIBDIR=$(pkg-config --variable=libdir hdf5-serial 2>/dev/null || pkg-config --variable=libdir hdf5 2>/dev/null || echo "/usr/lib/x86_64-linux-gnu/hdf5/serial")
-        echo "LDFLAGS=$LDFLAGS -L${ITENSOR_DIR}/lib -L${HDF5_LIBDIR}" >> $GITHUB_ENV
+        HDF5_LIBDIR=$(pkg-config hdf5-serial --libs-only-L 2>/dev/null || pkg-config hdf5 --libs-only-L 2>/dev/null || echo "-L/usr/lib/x86_64-linux-gnu/hdf5/serial")
+        echo "LDFLAGS=$LDFLAGS -L${ITENSOR_DIR}/lib ${HDF5_LIBDIR}" >> $GITHUB_ENV
         echo "CPPFLAGS=$CPPFLAGS -I${ITENSOR_DIR} ${HDF5_CFLAGS} -DITENSOR_USE_HDF5 -DITENSOR_USE_OMP -D__ITENSOR_LAPACK_WRAP_h -DH5_USE_110_API" >> $GITHUB_ENV
         # temporary until configure.ac is updated to detect ITensor
         echo "CXXFLAGS=-O3 -fconcepts" >> $GITHUB_ENV

From 7dce41eedab7c64be59f1a5802edc60f06ad1964 Mon Sep 17 00:00:00 2001
From: strandn <nstrand@uchicago.edu>
Date: Tue, 5 May 2026 13:53:46 -0500
Subject: [PATCH 14/26] fix ARG issue

Co-authored-by: Daniele <Iximiel@users.noreply.github.com>
---
 src/ttsketch/TTMetaD.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index adaae02e5a..797ed22981 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -100,7 +100,6 @@ PLUMED_REGISTER_ACTION(TTMetaD, "TTMETAD")
 
 void TTMetaD::registerKeywords(Keywords& keys) {
   Bias::registerKeywords(keys);
-  keys.use("ARG");
   keys.addFlag("KERNEL_BASIS", false, "Specifies that local kernel basis should be used instead of Fourier basis");
   keys.add("compulsory", "SIGMA", "the widths of the Gaussian hills");
   keys.add("compulsory", "PACE", "the frequency for hill addition");

From 231a97cf721f0bd9b81270ca0d3aa23adcfebb5c Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Thu, 7 May 2026 19:07:08 -0500
Subject: [PATCH 15/26] replace vector<vector<>> with Matrix<>

---
 src/ttsketch/TTHelper.cpp | 12 ++++++------
 src/ttsketch/TTHelper.h   |  2 +-
 src/ttsketch/TTMetaD.cpp  | 10 ++++++----
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/ttsketch/TTHelper.cpp b/src/ttsketch/TTHelper.cpp
index 0a6dfec51a..6016b22a01 100644
--- a/src/ttsketch/TTHelper.cpp
+++ b/src/ttsketch/TTHelper.cpp
@@ -104,7 +104,7 @@ std::tuple<Matrix<double>, std::vector<double>, double> covMat(const MPS& tt, co
   rho /= elt(Z);
   // ei[k]  = E[x_k],  eii[k] = E[x_k^2],  eij[k][l] = E[x_k * x_l] for k < l
   std::vector<double> ei(d), eii(d);
-  std::vector<std::vector<double>> eij(d, std::vector<double>(d));
+  Matrix<double> eij(d, d);
   for(int k = 1; k <= d; ++k) {
     // replace dimension k's integral vector with int1 (resp. int2) to get E[x_k] (resp. E[x_k^2])
     auto eival = rho(1) * (k == 1 ? basis_int1[0] : basis_int0[0]);
@@ -121,14 +121,14 @@ std::tuple<Matrix<double>, std::vector<double>, double> covMat(const MPS& tt, co
       for(int i = 2; i <= d; ++i) {
         eijval *= rho(i) * (k == i || l == i ? basis_int1[i - 1] : basis_int0[i - 1]);
       }
-      eij[k - 1][l - 1] = elt(eijval);
+      eij(k - 1, l - 1) = elt(eijval);
     }
   }
   // sigma(k,l) = Cov(x_k, x_l) = E[x_k*x_l] - E[x_k]*E[x_l]
   Matrix<double> sigma(d, d);
   for(int k = 1; k <= d; ++k) {
     for(int l = k; l <= d; ++l) {
-      sigma(k - 1, l - 1) = sigma(l - 1, k - 1) = k == l ? eii[k - 1] - pow(ei[k - 1], 2) : eij[k - 1][l - 1] - ei[k - 1] * ei[l - 1];
+      sigma(k - 1, l - 1) = sigma(l - 1, k - 1) = k == l ? eii[k - 1] - pow(ei[k - 1], 2) : eij(k - 1, l - 1) - ei[k - 1] * ei[l - 1];
     }
   }
   return std::make_tuple(sigma, ei, elt(Z));
@@ -137,8 +137,8 @@ std::tuple<Matrix<double>, std::vector<double>, double> covMat(const MPS& tt, co
 // Compute the 2D marginal density of the normalized TT distribution for dimensions
 // pos1 and pos2 on a (bins x bins) grid. All other dimensions are integrated out
 // analytically using int0, which collapses those TT cores to scalar factors.
-void marginal2d(const MPS& tt, const std::vector<BasisFunc>& basis, int pos1, int pos2, std::vector<std::vector<double>>& grid, bool conv) {
-  int bins = grid.size();
+void marginal2d(const MPS& tt, const std::vector<BasisFunc>& basis, int pos1, int pos2, Matrix<double>& grid, bool conv) {
+  int bins = nrows(grid);
   int d = length(tt);
   auto s = siteInds(tt);
   std::vector<ITensor> basis_int0(d);
@@ -174,7 +174,7 @@ void marginal2d(const MPS& tt, const std::vector<BasisFunc>& basis, int pos1, in
           val *= rho(k) * basis_int0[k - 1];
         }
       }
-      grid[i][j] = elt(val);
+      grid(i, j) = elt(val);
     }
   }
 }
diff --git a/src/ttsketch/TTHelper.h b/src/ttsketch/TTHelper.h
index 76beb0897e..04ad2d6a71 100644
--- a/src/ttsketch/TTHelper.h
+++ b/src/ttsketch/TTHelper.h
@@ -31,7 +31,7 @@ std::vector<double> ttGrad(const itensor::MPS& tt, const std::vector<BasisFunc>&
 std::tuple<Matrix<double>, std::vector<double>, double> covMat(const itensor::MPS& tt, const std::vector<BasisFunc>& basis);
 // Fill `grid` (bins x bins) with the 2D marginal density of `tt` for dimensions
 // pos1 and pos2, obtained by integrating out all other dimensions analytically.
-void marginal2d(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, int pos1, int pos2, std::vector<std::vector<double>>& grid, bool conv);
+void marginal2d(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, int pos1, int pos2, Matrix<double>& grid, bool conv);
 
 }
 }
diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index 797ed22981..64eeddc5a7 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -457,10 +457,10 @@ void TTMetaD::update() {
 
       // record current bias at hill centers before update, for computing relative error after
       std::vector<double> A0(N);
-      std::vector<std::vector<double>> x(N);
+      Matrix<double> x(N, this->d_);
       for(unsigned i = 0; i < N; ++i) {
-        x[i] = this->hills_[i].center;
-        A0[i] = getBias(x[i]);
+        for(unsigned j = 0; j < this->d_; ++j) x(i, j) = this->hills_[i].center[j];
+        A0[i] = getBias(this->hills_[i].center);
       }
 
       log << "\nStarting TT-sketch...\n";
@@ -473,7 +473,9 @@ void TTMetaD::update() {
       // compute relative L2 approximation error at hill centers: ||V_new - V_old|| / ||V_old||
       std::vector<double> diff(N);
       for(unsigned i = 0; i < N; ++i) {
-        diff[i] = getBias(x[i]);
+        std::vector<double> xi(this->d_);
+        for(unsigned j = 0; j < this->d_; ++j) xi[j] = x(i, j);
+        diff[i] = getBias(xi);
       }
       std::transform(diff.begin(), diff.end(), A0.begin(), diff.begin(), std::minus<double>());
       log << "Relative l2 error = " << sqrt(norm(diff) / norm(A0)) << "\n\n";

From 72ac63c14eae7d346cc80adfd601979669dbf393 Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Thu, 7 May 2026 19:24:55 -0500
Subject: [PATCH 16/26] minor

---
 src/ttsketch/TTHelper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ttsketch/TTHelper.cpp b/src/ttsketch/TTHelper.cpp
index 6016b22a01..a39fb55897 100644
--- a/src/ttsketch/TTHelper.cpp
+++ b/src/ttsketch/TTHelper.cpp
@@ -138,7 +138,7 @@ std::tuple<Matrix<double>, std::vector<double>, double> covMat(const MPS& tt, co
 // pos1 and pos2 on a (bins x bins) grid. All other dimensions are integrated out
 // analytically using int0, which collapses those TT cores to scalar factors.
 void marginal2d(const MPS& tt, const std::vector<BasisFunc>& basis, int pos1, int pos2, Matrix<double>& grid, bool conv) {
-  int bins = nrows(grid);
+  int bins = grid.nrows();
   int d = length(tt);
   auto s = siteInds(tt);
   std::vector<ITensor> basis_int0(d);

From 0eba902d2b564323afacec31fb4b3192078c6536 Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Fri, 8 May 2026 16:00:04 -0500
Subject: [PATCH 17/26] attempt configure.ac + exclude debug when ITensor is
 used

---
 .github/workflows/linuxWF.yml | 12 ++++++------
 configure.ac                  | 34 ++++++++++++++++++++++++++++++++++
 src/ttsketch/TTHelper.cpp     |  2 ++
 src/ttsketch/TTHelper.h       |  3 ++-
 src/ttsketch/TTMetaD.cpp      |  2 ++
 5 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/linuxWF.yml b/.github/workflows/linuxWF.yml
index a495d5ec88..05ca69a920 100644
--- a/.github/workflows/linuxWF.yml
+++ b/.github/workflows/linuxWF.yml
@@ -73,18 +73,18 @@ jobs:
       run: |
         .ci/install.itensor
     - name: Configure ITensor
-      if: ${{ contains( matrix.variant, '-mpi-' ) }}
+      # exclude debug builds: _GLIBCXX_DEBUG changes the STL ABI and is incompatible
+      # with libitensor.a (built without that flag), causing compilation errors in
+      # ITensor headers and runtime corruption. The #ifdef __PLUMED_HAS_LIBITENSOR
+      # guards in ttsketch ensure safe compilation when ITensor is not configured.
+      if: ${{ contains( matrix.variant, '-mpi-' ) && ! contains( matrix.variant, '-debug-' ) }}
       run: |
         ITENSOR_DIR="$HOME/opt/itensor"
         HDF5_CFLAGS=$(pkg-config --cflags hdf5-serial 2>/dev/null || pkg-config --cflags hdf5 2>/dev/null || echo "-I/usr/include/hdf5/serial")
         HDF5_LIBDIR=$(pkg-config hdf5-serial --libs-only-L 2>/dev/null || pkg-config hdf5 --libs-only-L 2>/dev/null || echo "-L/usr/lib/x86_64-linux-gnu/hdf5/serial")
         echo "LDFLAGS=$LDFLAGS -L${ITENSOR_DIR}/lib ${HDF5_LIBDIR}" >> $GITHUB_ENV
         echo "CPPFLAGS=$CPPFLAGS -I${ITENSOR_DIR} ${HDF5_CFLAGS} -DITENSOR_USE_HDF5 -DITENSOR_USE_OMP -D__ITENSOR_LAPACK_WRAP_h -DH5_USE_110_API" >> $GITHUB_ENV
-        # temporary until configure.ac is updated to detect ITensor
-        echo "CXXFLAGS=-O3 -fconcepts" >> $GITHUB_ENV
-        echo "LIBS=-litensor -lopenblas -lhdf5 -lhdf5_hl" >> $GITHUB_ENV
-        # this will be uncommented after the configure.ac modifications
-        #echo "PLUMED_CONFIG=$PLUMED_CONFIG --enable-libitensor" >> $GITHUB_ENV
+        echo "PLUMED_CONFIG=$PLUMED_CONFIG --enable-libitensor" >> $GITHUB_ENV
     - name: Install tools for setting up the manuals
       if: contains( matrix.variant, '-doc-' )
       run: |
diff --git a/configure.ac b/configure.ac
index 2889ec641f..18cbda2525 100644
--- a/configure.ac
+++ b/configure.ac
@@ -374,6 +374,8 @@ PLUMED_CONFIG_ENABLE([af_cuda],[search for arrayfire_cuda],[no])
 PLUMED_CONFIG_ENABLE([af_cpu],[search for arrayfire_cpu],[no])
 PLUMED_CONFIG_ENABLE([libtorch],[search for libtorch],[no]) #added by luigibonati
 PLUMED_CONFIG_ENABLE([libmetatomic],[search for libmetatomic],[no])
+PLUMED_CONFIG_ENABLE([libitensor],[search for libitensor],[no])
+PLUMED_CONFIG_ENABLE([libhdf5],[search for libhdf5],[no])
 PLUMED_CONFIG_ENABLE([linuxperf],[activate the compile options for a more accurate report with perf on linux],[no])
 PLUMED_CONFIG_ENABLE([openacc],[search for openacc (use PLUMED_ACC_TYPE and PLUMED_ACC_GPU for controlling the settings)],[no])
 
@@ -1031,6 +1033,38 @@ if test $libmetatomic = true ; then
   ], [__PLUMED_HAS_LIBMETATOMIC], [metatomic_torch metatensor_torch metatensor], [true])
 fi
 
+# libitensor requires libhdf5
+if test $libitensor = true ; then
+  libhdf5=true;
+fi
+
+if test $libhdf5 = true ; then
+  PLUMED_CHECK_CXX_PACKAGE([libhdf5],[
+    #include <hdf5.h>
+    int main() {
+      unsigned majnum, minnum, relnum;
+      H5get_libversion(&majnum, &minnum, &relnum);
+      return 0;
+    }
+  ], [__PLUMED_HAS_LIBHDF5], [ hdf5_hl hdf5 ], [true])
+fi
+
+if test $libitensor = true ; then
+  # ITensor headers require C++20 concepts; add -std=c++20 for the detection check only
+  save_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -std=c++20"
+  PLUMED_CHECK_CXX_PACKAGE([libitensor],[
+    #include <itensor/all.h>
+    int main() {
+      auto i = itensor::Index(2,"i");
+      auto j = itensor::Index(2,"j");
+      auto T = itensor::ITensor(i,j);
+      return 0;
+    }
+  ], [__PLUMED_HAS_LIBITENSOR], [ itensor openblas ], [true])
+  CXXFLAGS="$save_CXXFLAGS"
+fi
+
 acc_found=ko
 if test $openacc = true; then
   if test -z "$PLUMED_ACC_TYPE"; then
diff --git a/src/ttsketch/TTHelper.cpp b/src/ttsketch/TTHelper.cpp
index a39fb55897..5f0fd5151a 100644
--- a/src/ttsketch/TTHelper.cpp
+++ b/src/ttsketch/TTHelper.cpp
@@ -4,6 +4,7 @@
    See the COPYRIGHT file distributed with this software for license details.
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
 #include "TTHelper.h"
+#ifdef __PLUMED_HAS_LIBITENSOR
 
 using namespace itensor;
 
@@ -181,3 +182,4 @@ void marginal2d(const MPS& tt, const std::vector<BasisFunc>& basis, int pos1, in
 
 }
 }
+#endif // __PLUMED_HAS_LIBITENSOR
diff --git a/src/ttsketch/TTHelper.h b/src/ttsketch/TTHelper.h
index 04ad2d6a71..9c84267e28 100644
--- a/src/ttsketch/TTHelper.h
+++ b/src/ttsketch/TTHelper.h
@@ -2,6 +2,7 @@
 #define __PLUMED_ttsketch_TTHelper_h
 #include "BasisFunc.h"
 #include "tools/Matrix.h"
+#ifdef __PLUMED_HAS_LIBITENSOR
 #include <itensor/all.h>
 
 namespace PLMD {
@@ -35,5 +36,5 @@ void marginal2d(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, int
 
 }
 }
-
+#endif // __PLUMED_HAS_LIBITENSOR
 #endif
diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index 64eeddc5a7..f933cd5352 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -6,6 +6,7 @@
  */
 
 #include "TTHelper.h"
+#ifdef __PLUMED_HAS_LIBITENSOR
 #include "bias/Bias.h"
 #include "core/ActionRegister.h"
 #include "core/ActionSet.h"
@@ -1085,3 +1086,4 @@ std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorM
 
 }
 }
+#endif // __PLUMED_HAS_LIBITENSOR

From 9590fb9067a5e3da6f71e0f91a74db639f546d80 Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Fri, 8 May 2026 17:13:02 -0500
Subject: [PATCH 18/26] missing openmp flag I think

---
 configure    | 300 +++++++++++++++++++++++++++++++++++++++++++++++++++
 configure.ac |   5 +-
 2 files changed, 303 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index fbf3067928..eeb5610944 100755
--- a/configure
+++ b/configure
@@ -749,6 +749,8 @@ enable_af_cuda
 enable_af_cpu
 enable_libtorch
 enable_libmetatomic
+enable_libitensor
+enable_libhdf5
 enable_linuxperf
 enable_openacc
 enable_openmp
@@ -1455,6 +1457,8 @@ Optional Features:
   --enable-af_cpu         enable search for arrayfire_cpu, default: no
   --enable-libtorch       enable search for libtorch, default: no
   --enable-libmetatomic   enable search for libmetatomic, default: no
+  --enable-libitensor     enable search for libitensor, default: no
+  --enable-libhdf5        enable search for libhdf5, default: no
   --enable-linuxperf      enable activate the compile options for a more
                           accurate report with perf on linux, default: no
   --enable-openacc        enable search for openacc (use PLUMED_ACC_TYPE and
@@ -3240,6 +3244,42 @@ fi
 
 
 
+libitensor=
+# Check whether --enable-libitensor was given.
+if test "${enable_libitensor+set}" = set; then :
+  enableval=$enable_libitensor; case "${enableval}" in
+             (yes) libitensor=true ;;
+             (no)  libitensor=false ;;
+             (*)   as_fn_error $? "wrong argument to --enable-libitensor" "$LINENO" 5 ;;
+  esac
+else
+  case "no" in
+             (yes) libitensor=true ;;
+             (no)  libitensor=false ;;
+  esac
+
+fi
+
+
+
+libhdf5=
+# Check whether --enable-libhdf5 was given.
+if test "${enable_libhdf5+set}" = set; then :
+  enableval=$enable_libhdf5; case "${enableval}" in
+             (yes) libhdf5=true ;;
+             (no)  libhdf5=false ;;
+             (*)   as_fn_error $? "wrong argument to --enable-libhdf5" "$LINENO" 5 ;;
+  esac
+else
+  case "no" in
+             (yes) libhdf5=true ;;
+             (no)  libhdf5=false ;;
+  esac
+
+fi
+
+
+
 linuxperf=
 # Check whether --enable-linuxperf was given.
 if test "${enable_linuxperf+set}" = set; then :
@@ -10335,6 +10375,266 @@ $as_echo "$as_me: WARNING: cannot enable __PLUMED_HAS_LIBMETATOMIC" >&2;}
 
 fi
 
+# libitensor requires libhdf5
+if test $libitensor = true ; then
+  libhdf5=true;
+fi
+
+if test $libhdf5 = true ; then
+
+    found=ko
+    __PLUMED_HAS_LIBHDF5=no
+    if test "${libsearch}" = true ; then
+      testlibs=" hdf5_hl hdf5 "
+    else
+      testlibs=""
+    fi
+    save_LIBS="$LIBS"
+
+    # check if multiple libraries are required simultaneously
+    multiple="no"
+    if test "true" = "true"; then
+      multiple="yes"
+      all_LIBS=""
+      for testlib in $testlibs;
+      do
+        all_LIBS="$all_LIBS -l$testlib"
+      done
+      testlibs=" " # to check only without libraries, and later with all together
+    fi
+
+    # check without libraries
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking libhdf5 without extra libs" >&5
+$as_echo_n "checking libhdf5 without extra libs... " >&6; }
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+    #include <hdf5.h>
+    int main() {
+      unsigned majnum, minnum, relnum;
+      H5get_libversion(&majnum, &minnum, &relnum);
+      return 0;
+    }
+
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  found=ok
+          { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+
+    if test "$found" = "ko" ; then
+      if test "$multiple" = "yes" ; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking libhdf5 with $all_LIBS" >&5
+$as_echo_n "checking libhdf5 with $all_LIBS... " >&6; }
+        LIBS="$all_LIBS $LIBS"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+    #include <hdf5.h>
+    int main() {
+      unsigned majnum, minnum, relnum;
+      H5get_libversion(&majnum, &minnum, &relnum);
+      return 0;
+    }
+
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  found=ok
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+      else
+        for testlib in $testlibs
+        do
+          { $as_echo "$as_me:${as_lineno-$LINENO}: checking libhdf5 with -l$testlib" >&5
+$as_echo_n "checking libhdf5 with -l$testlib... " >&6; }
+          LIBS="-l$testlib $LIBS"
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+    #include <hdf5.h>
+    int main() {
+      unsigned majnum, minnum, relnum;
+      H5get_libversion(&majnum, &minnum, &relnum);
+      return 0;
+    }
+
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  found=ok
+                { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+          if test $found = ok ; then
+            break
+          fi
+          LIBS="$save_LIBS"
+        done
+      fi
+    fi
+
+    if test $found = ok ; then
+      $as_echo "#define __PLUMED_HAS_LIBHDF5 1" >>confdefs.h
+
+      __PLUMED_HAS_LIBHDF5=yes
+    else
+      { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot enable __PLUMED_HAS_LIBHDF5" >&5
+$as_echo "$as_me: WARNING: cannot enable __PLUMED_HAS_LIBHDF5" >&2;}
+      LIBS="$save_LIBS"
+    fi
+
+fi
+
+if test $libitensor = true ; then
+  # ITensor headers require C++20 concepts; add -std=c++20 for the detection check only
+  save_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -std=c++20"
+
+    found=ko
+    __PLUMED_HAS_LIBITENSOR=no
+    if test "${libsearch}" = true ; then
+      testlibs=" itensor openblas "
+    else
+      testlibs=""
+    fi
+    save_LIBS="$LIBS"
+
+    # check if multiple libraries are required simultaneously
+    multiple="no"
+    if test "true" = "true"; then
+      multiple="yes"
+      all_LIBS=""
+      for testlib in $testlibs;
+      do
+        all_LIBS="$all_LIBS -l$testlib"
+      done
+      testlibs=" " # to check only without libraries, and later with all together
+    fi
+
+    # check without libraries
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking libitensor without extra libs" >&5
+$as_echo_n "checking libitensor without extra libs... " >&6; }
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+    #include <itensor/all.h>
+    int main() {
+      auto i = itensor::Index(2,"i");
+      auto j = itensor::Index(2,"j");
+      auto T = itensor::ITensor(i,j);
+      return 0;
+    }
+
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  found=ok
+          { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+
+    if test "$found" = "ko" ; then
+      if test "$multiple" = "yes" ; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking libitensor with $all_LIBS" >&5
+$as_echo_n "checking libitensor with $all_LIBS... " >&6; }
+        LIBS="$all_LIBS $LIBS"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+    #include <itensor/all.h>
+    int main() {
+      auto i = itensor::Index(2,"i");
+      auto j = itensor::Index(2,"j");
+      auto T = itensor::ITensor(i,j);
+      return 0;
+    }
+
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  found=ok
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+      else
+        for testlib in $testlibs
+        do
+          { $as_echo "$as_me:${as_lineno-$LINENO}: checking libitensor with -l$testlib" >&5
+$as_echo_n "checking libitensor with -l$testlib... " >&6; }
+          LIBS="-l$testlib $LIBS"
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+    #include <itensor/all.h>
+    int main() {
+      auto i = itensor::Index(2,"i");
+      auto j = itensor::Index(2,"j");
+      auto T = itensor::ITensor(i,j);
+      return 0;
+    }
+
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  found=ok
+                { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+          if test $found = ok ; then
+            break
+          fi
+          LIBS="$save_LIBS"
+        done
+      fi
+    fi
+
+    if test $found = ok ; then
+      $as_echo "#define __PLUMED_HAS_LIBITENSOR 1" >>confdefs.h
+
+      __PLUMED_HAS_LIBITENSOR=yes
+    else
+      { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot enable __PLUMED_HAS_LIBITENSOR" >&5
+$as_echo "$as_me: WARNING: cannot enable __PLUMED_HAS_LIBITENSOR" >&2;}
+      LIBS="$save_LIBS"
+    fi
+
+  CXXFLAGS="$save_CXXFLAGS"
+fi
+
 acc_found=ko
 if test $openacc = true; then
   if test -z "$PLUMED_ACC_TYPE"; then
diff --git a/configure.ac b/configure.ac
index 18cbda2525..9728f44fcb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1050,9 +1050,10 @@ if test $libhdf5 = true ; then
 fi
 
 if test $libitensor = true ; then
-  # ITensor headers require C++20 concepts; add -std=c++20 for the detection check only
+  # ITensor headers require C++20 concepts; libitensor.a has OpenMP-outlined functions
+  # that need -fopenmp at link time. Add both flags for the detection check only.
   save_CXXFLAGS="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -std=c++20"
+  CXXFLAGS="$CXXFLAGS -std=c++20 -fopenmp"
   PLUMED_CHECK_CXX_PACKAGE([libitensor],[
     #include <itensor/all.h>
     int main() {

From 4cb33b172c8eb9541cabbb69f1c7b7986b17b86a Mon Sep 17 00:00:00 2001
From: Nils Strand <nstrand@midway3-login3.rcc.local>
Date: Fri, 8 May 2026 18:06:56 -0500
Subject: [PATCH 19/26] updated configure file

---
 configure | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index eeb5610944..1f54f1fc16 100755
--- a/configure
+++ b/configure
@@ -10505,9 +10505,10 @@ $as_echo "$as_me: WARNING: cannot enable __PLUMED_HAS_LIBHDF5" >&2;}
 fi
 
 if test $libitensor = true ; then
-  # ITensor headers require C++20 concepts; add -std=c++20 for the detection check only
+  # ITensor headers require C++20 concepts; libitensor.a has OpenMP-outlined functions
+  # that need -fopenmp at link time. Add both flags for the detection check only.
   save_CXXFLAGS="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -std=c++20"
+  CXXFLAGS="$CXXFLAGS -std=c++20 -fopenmp"
 
     found=ko
     __PLUMED_HAS_LIBITENSOR=no

From f88dcfdf9d3bb4cf85773965ac871383a7385ad3 Mon Sep 17 00:00:00 2001
From: Nils Strand <nstrand@midway3-login3.rcc.local>
Date: Sun, 10 May 2026 19:37:43 -0500
Subject: [PATCH 20/26] several fixes

---
 configure                          |   53 +-
 configure.ac                       |    8 +-
 regtest/ttsketch/rt-ttmetad/config |    1 +
 src/ttsketch/TTHelper.cpp          |    4 +-
 src/ttsketch/TTHelper.h            |    4 +-
 src/ttsketch/TTMetaD.cpp           | 2182 ++++++++++++++--------------
 src/ttsketch/module.md             |    2 +-
 7 files changed, 1154 insertions(+), 1100 deletions(-)

diff --git a/configure b/configure
index 1f54f1fc16..da78d1145a 100755
--- a/configure
+++ b/configure
@@ -10505,10 +10505,57 @@ $as_echo "$as_me: WARNING: cannot enable __PLUMED_HAS_LIBHDF5" >&2;}
 fi
 
 if test $libitensor = true ; then
-  # ITensor headers require C++20 concepts; libitensor.a has OpenMP-outlined functions
-  # that need -fopenmp at link time. Add both flags for the detection check only.
+  # NOTE: ITensor headers use C++20 'requires' expressions. -fconcepts enables them on
+  # GCC without forcing -std=c++20 globally. When PLUMED upgrades to -std=c++20
+  # everywhere, this PLUMED_CHECK_CXXFLAG call can be removed.
+
+  save_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -fconcepts"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -fconcepts" >&5
+$as_echo_n "checking whether $CXX accepts -fconcepts... " >&6; }
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: not linking" >&5
+$as_echo "not linking" >&6; }; CXXFLAGS="$save_CXXFLAGS"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }; CXXFLAGS="$save_CXXFLAGS"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
   save_CXXFLAGS="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -std=c++20 -fopenmp"
+  CXXFLAGS="$CXXFLAGS -fopenmp"
 
     found=ko
     __PLUMED_HAS_LIBITENSOR=no
diff --git a/configure.ac b/configure.ac
index 9728f44fcb..80ac930fcc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1050,10 +1050,12 @@ if test $libhdf5 = true ; then
 fi
 
 if test $libitensor = true ; then
-  # ITensor headers require C++20 concepts; libitensor.a has OpenMP-outlined functions
-  # that need -fopenmp at link time. Add both flags for the detection check only.
+  # NOTE: ITensor headers use C++20 'requires' expressions. -fconcepts enables them on
+  # GCC without forcing -std=c++20 globally. When PLUMED upgrades to -std=c++20
+  # everywhere, this PLUMED_CHECK_CXXFLAG call can be removed.
+  PLUMED_CHECK_CXXFLAG([-fconcepts])
   save_CXXFLAGS="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -std=c++20 -fopenmp"
+  CXXFLAGS="$CXXFLAGS -fopenmp"
   PLUMED_CHECK_CXX_PACKAGE([libitensor],[
     #include <itensor/all.h>
     int main() {
diff --git a/regtest/ttsketch/rt-ttmetad/config b/regtest/ttsketch/rt-ttmetad/config
index a7c12c885f..21795a29b4 100644
--- a/regtest/ttsketch/rt-ttmetad/config
+++ b/regtest/ttsketch/rt-ttmetad/config
@@ -1,3 +1,4 @@
+plumed_needs=libitensor
 plumed_modules=ttsketch
 type=driver
 arg="--plumed plumed.dat --trajectory-stride 500 --timestep 0.002 --igro traj.gro"
diff --git a/src/ttsketch/TTHelper.cpp b/src/ttsketch/TTHelper.cpp
index 5f0fd5151a..27b335e166 100644
--- a/src/ttsketch/TTHelper.cpp
+++ b/src/ttsketch/TTHelper.cpp
@@ -4,7 +4,7 @@
    See the COPYRIGHT file distributed with this software for license details.
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
 #include "TTHelper.h"
-#ifdef __PLUMED_HAS_LIBITENSOR
+#if defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
 
 using namespace itensor;
 
@@ -182,4 +182,4 @@ void marginal2d(const MPS& tt, const std::vector<BasisFunc>& basis, int pos1, in
 
 }
 }
-#endif // __PLUMED_HAS_LIBITENSOR
+#endif // defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
diff --git a/src/ttsketch/TTHelper.h b/src/ttsketch/TTHelper.h
index 9c84267e28..7168102ed7 100644
--- a/src/ttsketch/TTHelper.h
+++ b/src/ttsketch/TTHelper.h
@@ -2,7 +2,7 @@
 #define __PLUMED_ttsketch_TTHelper_h
 #include "BasisFunc.h"
 #include "tools/Matrix.h"
-#ifdef __PLUMED_HAS_LIBITENSOR
+#if defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
 #include <itensor/all.h>
 
 namespace PLMD {
@@ -36,5 +36,5 @@ void marginal2d(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, int
 
 }
 }
-#endif // __PLUMED_HAS_LIBITENSOR
+#endif // defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
 #endif
diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index f933cd5352..4d043f249c 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -1,1089 +1,1093 @@
-/* Copyright (c) 2026, Nils E. Strand, Siyao Yang, Yuehaw Khoo, and Aaron R. Dinner
- *
- * Tensor train (TT)-metadynamics.
- * https://arxiv.org/abs/2603.13549
- * See module.md for installation instructions.
- */
-
-#include "TTHelper.h"
-#ifdef __PLUMED_HAS_LIBITENSOR
-#include "bias/Bias.h"
-#include "core/ActionRegister.h"
-#include "core/ActionSet.h"
-#include "core/PlumedMain.h"
-#include "tools/Exception.h"
-#include "tools/Communicator.h"
-#include "tools/File.h"
-#include "tools/OpenMP.h"
-
-using namespace itensor;
-using namespace PLMD::bias;
-
-namespace PLMD {
-namespace ttsketch {
-
-class TTMetaD : public Bias {
-
-private:
-  struct Gaussian {
-    double height;
-    std::vector<double> center;
-    std::vector<double> sigma;
-    std::vector<double> invsigma;
-    Gaussian(const double h, const std::vector<double>& c, const std::vector<double>& s)
-      : height(h), center(c), sigma(s), invsigma(s) {
-      for(unsigned i = 0; i < invsigma.size(); ++i) {
-        if(abs(invsigma[i]) > 1.e-20) {
-          invsigma[i] = 1.0 / invsigma[i];
-        } else {
-          invsigma[i] = 0.0;
-        }
-      }
-    }
-  };
-  // standard metadynamics parameters
-  double kbt_;
-  int stride_;
-  bool welltemp_;
-  double biasf_;
-  std::string fmt_;
-  bool isFirstStep_;
-  double height0_;
-  std::vector<double> sigma0_;
-  std::vector<Gaussian> hills_;   // Gaussians deposited since the last TT update
-  OFile hillsOfile_;
-  std::string mw_dir_;
-  std::string hillsfname_;
-  bool walkers_mpi_;
-  int mpi_size_;
-  int mpi_rank_;
-  // TT-sketch parameters
-  unsigned d_;               // number of CV dimensions
-  int sketch_rc_;            // initial (sketch) rank for the random TT coefficient tensor
-  int sketch_r_;             // target TT bond dimension after SVD truncation (0 = use cutoff)
-  double sketch_cutoff_;     // relative SVD truncation threshold (used when sketch_r_=0)
-  int sketch_stride_;        // steps between TT bias updates (tau in the paper)
-  double sketch_alpha_;      // weight for non-constant basis functions in the sketch coefficient TT
-  std::vector<BasisFunc> sketch_basis_;  // one BasisFunc per CV dimension
-  unsigned sketch_count_;    // number of TT updates performed so far (starts at 1, increments after each update)
-  MPS vb_;                   // current TT approximation of the accumulated bias (V_bias^TT)
-  double sketch_until_;      // simulation time after which the bias is frozen (no further updates)
-  bool frozen_;              // true once the bias has been frozen (after sketch_until_)
-  bool sketch_conv_;         // true if Gaussian smoothing (conv mode) is active for basis evaluation
-  bool nonintrusive_;        // if true, uses the intrusive-free variant: accumulates Bemp directly
-  // instead of re-projecting the previous vb_ through the Gram matrix
-  bool deterministic_;       // if true, use a fixed RNG seed (42) in createTTCoeff() for reproducibility
-  MPS B_prev_;               // accumulated tensor moment from all previous sketches (nonintrusive only)
-  std::vector<ITensor> A_prev_;   // accumulated cross-gram matrices per bond (nonintrusive only)
-
-  void readGaussians(IFile *ifile);
-  void writeGaussian(const Gaussian& hill, OFile& file);
-  double getHeight(const std::vector<double>& cv);
-  double getBias(const std::vector<double>& cv);
-  double getBiasAndDerivatives(const std::vector<double>& cv, std::vector<double>& der);
-  double evaluateGaussian(const std::vector<double>& cv, const Gaussian& hill);
-  double evaluateGaussianAndDerivatives(const std::vector<double>& cv, const Gaussian& hill, std::vector<double>& der, std::vector<double>& dp);
-  bool scanOneHill(IFile* ifile, std::vector<Value>& tmpvalues, std::vector<double>& center, std::vector<double>& sigma, double& height);
-  void paraSketch();
-  MPS createTTCoeff() const;
-  std::pair<std::vector<ITensor>, IndexSet> intBasisSample(const IndexSet& is) const;
-  std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> formTensorMoment(const std::vector<ITensor>& M, const MPS& coeff, const IndexSet& is);
-  std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> formTensorMomentVb(const MPS& coeff);
-
-public:
-  explicit TTMetaD(const ActionOptions&);
-  void calculate() override;
-  void update() override;
-  static void registerKeywords(Keywords& keys);
-};
-
-PLUMED_REGISTER_ACTION(TTMetaD, "TTMETAD")
-
-void TTMetaD::registerKeywords(Keywords& keys) {
-  Bias::registerKeywords(keys);
-  keys.addFlag("KERNEL_BASIS", false, "Specifies that local kernel basis should be used instead of Fourier basis");
-  keys.add("compulsory", "SIGMA", "the widths of the Gaussian hills");
-  keys.add("compulsory", "PACE", "the frequency for hill addition");
-  keys.add("compulsory", "FILE", "HILLS", "a file in which the list of added hills is stored");
-  keys.add("compulsory", "HEIGHT", "the heights of the Gaussian hills");
-  keys.add("optional", "FMT", "specify format for HILLS files (useful for decrease the number of digits in regtests)");
-  keys.add("optional", "BIASFACTOR", "use well tempered metadynamics and use this bias factor. Please note you must also specify temp");
-  keys.add("optional", "TEMP", "the system temperature - this is only needed if you are doing well-tempered metadynamics");
-  keys.addFlag("WALKERS_MPI", false, "To be used when gromacs + multiple walkers are used");
-  keys.add("optional", "WALKERS_DIR", "shared directory with the hills files from all the walkers");
-  keys.use("RESTART");
-  keys.add("optional", "SKETCH_RANK", "Target rank for TTSketch algorithm - compulsory if SKETCH_CUTOFF is not specified");
-  keys.add("optional", "SKETCH_CUTOFF", "Truncation error cutoff for singular value decomposition - compulsory if SKETCH_RANK is not specified");
-  keys.add("compulsory", "SKETCH_INITRANK", "Initial rank for TTSketch algorithm");
-  keys.add("compulsory", "SKETCH_PACE", "1e6", "The frequency for TT Vbias updates");
-  keys.add("compulsory", "INTERVAL_MIN", "Lower limits, outside the limits the system will not feel the biasing force");
-  keys.add("compulsory", "INTERVAL_MAX", "Upper limits, outside the limits the system will not feel the biasing force");
-  keys.add("compulsory", "SKETCH_NBASIS", "20", "Number of basis functions per dimension");
-  keys.add("compulsory", "SKETCH_ALPHA", "0.05", "Weight coefficient for random tensor train construction");
-  keys.add("optional", "SKETCH_UNTIL", "After this time, the bias potential freezes");
-  keys.add("optional", "SKETCH_WIDTH", "Width of Gaussian kernels for smoothing");
-  keys.add("optional", "KERNEL_DX", "Width of basis function kernels");
-  keys.addFlag("NONINTRUSIVE", false, "Sketching uses previous exact sum of Gaussians instead of TT approximation");
-  keys.addFlag("DETERMINISTIC", false, "Use a fixed random seed for TT sketch construction, ensuring reproducible results. Intended for regression testing.");
-}
-
-TTMetaD::TTMetaD(const ActionOptions& ao):
-  PLUMED_BIAS_INIT(ao),
-  kbt_(0.0),
-  stride_(0),
-  welltemp_(false),
-  biasf_(-1.0),
-  isFirstStep_(true),
-  height0_(std::numeric_limits<double>::max()),
-  mw_dir_(""),
-  walkers_mpi_(false),
-  mpi_size_(0),
-  mpi_rank_(0),
-  sketch_r_(0),
-  sketch_cutoff_(0.0),
-  sketch_count_(1),
-  sketch_until_(std::numeric_limits<double>::max()),
-  frozen_(false),
-  sketch_conv_(false),
-  nonintrusive_(false),
-  deterministic_(false) {
-  bool kernel;
-  parseFlag("KERNEL_BASIS", kernel);
-  this->d_ = getNumberOfArguments();
-  if(this->d_ < 2) {
-    error("Number of arguments must be at least 2");
-  }
-  parse("FMT", this->fmt_);
-  parseVector("SIGMA", this->sigma0_);
-  if(this->sigma0_.size() != d_) {
-    error("number of arguments does not match number of SIGMA parameters");
-  }
-  parse("HEIGHT", this->height0_);
-  parse("PACE", this->stride_);
-  if(stride_ <= 0) {
-    error("frequency for hill addition is nonsensical");
-  }
-  this->hillsfname_ = "HILLS";
-  parse("FILE", this->hillsfname_);
-  parse("BIASFACTOR", this->biasf_);
-  if(this->biasf_ < 1.0 && this->biasf_ != -1.0) {
-    error("well tempered bias factor is nonsensical");
-  }
-  this->kbt_ = getkBT();
-  if(this->biasf_ >= 1.0) {
-    if(this->kbt_ == 0.0) {
-      error("Unless the MD engine passes the temperature to plumed, with well-tempered metad you must specify it using TEMP");
-    }
-    this->welltemp_ = true;
-  }
-
-  parseFlag("WALKERS_MPI", this->walkers_mpi_);
-  parse("WALKERS_DIR", this->mw_dir_);
-  if(this->walkers_mpi_ && this->mw_dir_== "") {
-    const std::string ret = std::filesystem::current_path();
-    this->mw_dir_ = ret + "/";
-    multi_sim_comm.Bcast(this->mw_dir_, 0);
-  }
-  if(this->walkers_mpi_ && this->mw_dir_ != "") {
-    this->hillsfname_ = this->mw_dir_ + "/" + this->hillsfname_;
-  }
-
-  parse("SKETCH_RANK", this->sketch_r_);
-  parse("SKETCH_CUTOFF", this->sketch_cutoff_);
-  if(this->sketch_r_ <= 0 && (this->sketch_cutoff_ <= 0.0 || this->sketch_cutoff_ > 1.0)) {
-    error("Valid SKETCH_RANK or SKETCH_CUTOFF needs to be specified");
-  }
-  parse("SKETCH_INITRANK", this->sketch_rc_);
-  if(this->sketch_rc_ <= 0) {
-    error("SKETCH_INITRANK must be positive");
-  }
-  parse("SKETCH_PACE", this->sketch_stride_);
-  if(this->sketch_stride_ <= 0) {
-    error("SKETCH_PACE must be positive");
-  }
-  std::vector<double> interval_min;
-  parseVector("INTERVAL_MIN", interval_min);
-  if(interval_min.size() != this->d_) {
-    error("Number of arguments does not match number of INTERVAL_MIN parameters");
-  }
-  std::vector<double> interval_max;
-  parseVector("INTERVAL_MAX", interval_max);
-  if(interval_max.size() != this->d_) {
-    error("Number of arguments does not match number of INTERVAL_MAX parameters");
-  }
-  int nbasis = 20;
-  parse("SKETCH_NBASIS", nbasis);
-  if(nbasis <= 1) {
-    error("SKETCH_NBASIS must be greater than 1");
-  }
-  if(!kernel && nbasis % 2 == 0) {
-    ++nbasis;
-  }
-  parse("SKETCH_ALPHA", this->sketch_alpha_);
-  if(this->sketch_alpha_ <= 0.0 || this->sketch_alpha_ > 1.0) {
-    error("SKETCH_ALPHA must be positive and no greater than 1");
-  }
-  std::vector<double> w;
-  parseVector("SKETCH_WIDTH", w);
-  if(w.size() == 0) {
-    w.assign(this->d_, 0.0);
-  }
-  if(w.size() != this->d_) {
-    error("Number of arguments does not match number of SKETCH_WIDTH parameters");
-  }
-  for (double val : w) {
-    if (val != 0.0) {
-      this->sketch_conv_ = true;
-    }
-  }
-  std::vector<double> dx;
-  parseVector("KERNEL_DX", dx);
-  if(dx.size() == 0) {
-    dx.resize(this->d_, 0.0);
-  }
-  if(dx.size() != this->d_) {
-    error("Number of arguments does not match number of KERNEL_DX parameters");
-  }
-  for(unsigned i = 0; i < this->d_; ++i) {
-    if(this->sketch_conv_ && w[i] <= 0.0) {
-      error("Gaussian smoothing requires positive WIDTH");
-    }
-    if(kernel && dx[i] < 0.0) {
-      error("Kernel basis requires positive KERNEL_DX");
-    }
-    if(interval_max[i] <= interval_min[i]) {
-      error("INTERVAL_MAX parameters need to be greater than respective INTERVAL_MIN parameters");
-    }
-    this->sketch_basis_.push_back(BasisFunc(std::make_pair(interval_min[i], interval_max[i]), nbasis, w[i], kernel, dx[i]));
-  }
-  if(kernel && this->sketch_conv_) {
-    error("kernel smoothing incompatible with kernel basis");
-  }
-  if(this->walkers_mpi_) {
-    this->mpi_size_ = multi_sim_comm.Get_size();
-    this->mpi_rank_ = multi_sim_comm.Get_rank();
-  }
-
-  parse("SKETCH_UNTIL", this->sketch_until_);
-
-  parseFlag("NONINTRUSIVE", this->nonintrusive_);
-  parseFlag("DETERMINISTIC", this->deterministic_);
-
-  if(getRestart()) {
-    std::string ttfilename = "ttsketch.h5";
-    if(this->walkers_mpi_) {
-      ttfilename = "../" + ttfilename;
-    }
-    while(true) {
-      try {
-        this->vb_ = ttRead(ttfilename, ++this->sketch_count_);
-      } catch(...) {
-        --this->sketch_count_;
-        break;
-      }
-    }
-    if(this->sketch_count_ == 1) {
-      this->vb_ = MPS();
-    }
-    if(getTime() >= this->sketch_until_) {
-      this->frozen_ = true;
-    } else {
-      IFile hills_ifile;
-      if(!hills_ifile.FileExist(this->hillsfname_)) {
-        error("The hills file cannot be found");
-      }
-      hills_ifile.open(this->hillsfname_);
-      readGaussians(&hills_ifile);
-      hills_ifile.close();
-    }
-  }
-
-  if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
-    this->hillsOfile_.link(*this);
-    this->hillsOfile_.enforceSuffix("");
-    this->hillsOfile_.open(this->hillsfname_);
-    if(this->fmt_.length() > 0) {
-      this->hillsOfile_.fmtField(this->fmt_);
-    }
-    hillsOfile_.setHeavyFlush();
-    for(unsigned i = 0; i < this->d_; ++i) {
-      hillsOfile_.setupPrintValue(getPntrToArgument(i));
-    }
-  }
-}
-
-void TTMetaD::readGaussians(IFile *ifile) {
-  std::vector<double> center(this->d_);
-  std::vector<double> sigma(this->d_);
-  double height;
-  int nhills = 0;
-
-  std::vector<Value> tmpvalues;
-  for(unsigned j = 0; j < this->d_; ++j) {
-    tmpvalues.push_back(Value(this, getPntrToArgument(j)->getName(), false));
-  }
-
-  while(scanOneHill(ifile, tmpvalues, center, sigma, height)) {
-    ++nhills;
-    if(this->welltemp_ && this->biasf_ > 1.0) {
-      height *= (this->biasf_ - 1.0) / this->biasf_;
-    }
-    this->hills_.push_back(Gaussian(height, center, sigma));
-  }
-  log << "  restarting from step " << this->sketch_count_ << "\n";
-  log << "  " << nhills << " hills retrieved\n";
-
-  log << "  Bibliography " << plumed.cite("Strand, Yang, Khoo, and Dinner, https://arxiv.org/abs/2603.13549 (2026)");
-  log << plumed.cite("Fishman, White, and Stoudenmire, https://arxiv.org/abs/2007.14822 (2020)");
-  log << "\n";
-}
-
-bool TTMetaD::scanOneHill(IFile* ifile, std::vector<Value>& tmpvalues, std::vector<double>& center, std::vector<double>& sigma, double& height) {
-  double dummy;
-  if(ifile->scanField("time", dummy)) {
-    unsigned ncv = tmpvalues.size();
-    for(unsigned i = 0; i < ncv; ++i) {
-      ifile->scanField(&tmpvalues[i]);
-      if(tmpvalues[i].isPeriodic() && !getPntrToArgument(i)->isPeriodic()) {
-        error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
-      } else if(tmpvalues[i].isPeriodic()) {
-        std::string imin, imax;
-        tmpvalues[i].getDomain(imin, imax);
-        std::string rmin, rmax;
-        getPntrToArgument(i)->getDomain(rmin, rmax);
-        if(imin != rmin || imax != rmax) {
-          error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
-        }
-      }
-      center[i] = tmpvalues[i].get();
-    }
-    for(unsigned i = 0; i < ncv; ++i) {
-      ifile->scanField("sigma_" + getPntrToArgument(i)->getName(), sigma[i]);
-    }
-
-    ifile->scanField("height", height);
-    ifile->scanField("biasf", dummy);
-    ifile->scanField();
-    return true;
-  } else {
-    return false;
-  }
-}
-
-void TTMetaD::writeGaussian(const Gaussian& hill, OFile&file) {
-  file.printField("time", getTimeStep() * getStep());
-  for(unsigned i = 0; i < this->d_; ++i) {
-    file.printField(getPntrToArgument(i), hill.center[i]);
-  }
-  for(unsigned i = 0; i < this->d_; ++i) {
-    file.printField("sigma_" + getPntrToArgument(i)->getName(), hill.sigma[i]);
-  }
-  double height = hill.height;
-  if(this->welltemp_ && this->biasf_ > 1.0) {
-    height *= this->biasf_ / (this->biasf_ - 1.0);
-  }
-  file.printField("height", height).printField("biasf", this->biasf_);
-  file.printField();
-}
-
-// Called every MD step: evaluate total bias (TT + remaining Gaussians) and set forces.
-void TTMetaD::calculate() {
-  std::vector<double> cv(this->d_);
-  for(unsigned i = 0; i < this->d_; ++i) {
-    cv[i] = getArgument(i);
-  }
-
-  std::vector<double> der(this->d_, 0.0);
-
-  double ene = getBiasAndDerivatives(cv, der);
-  setBias(ene);
-  for(unsigned i = 0; i < this->d_; ++i) {
-    setOutputForce(i, -der[i]);
-  }
-}
-
-// Called every MD step after forces are applied.
-// Two separate triggers:
-//   1. Every sketch_stride_ steps: run paraSketch() to compress accumulated hills into vb_,
-//      clear the hills list, and rewind the HILLS file for the next interval.
-//   2. Every stride_ steps: deposit a new Gaussian hill and write it to the HILLS file.
-void TTMetaD::update() {
-  bool nowAddATT;
-  if(getStep() % this->sketch_stride_ == 0 && !this->isFirstStep_ && !this->frozen_) {
-    nowAddATT = true;
-    if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
-      this->hillsOfile_.flush();
-    }
-  } else {
-    nowAddATT = false;
-  }
-
-  if(nowAddATT) {
-    if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
-      unsigned N = this->hills_.size();
-      log << "Sample limits\n";
-      for(unsigned i = 0; i < this->d_; ++i) {
-        auto [large, small] = this->sketch_basis_[i].dom();
-        for(unsigned j = 0; j < N; ++j) {
-          if(this->hills_[j].center[i] > large) {
-            large = this->hills_[j].center[i];
-          }
-          if(this->hills_[j].center[i] < small) {
-            small = this->hills_[j].center[i];
-          }
-        }
-        log << small << " " << large << "\n";
-      }
-
-      log << "\nEmpirical means:\n";
-      Matrix<double> sigmahat(this->d_, this->d_);
-      std::vector<double> muhat(this->d_, 0.0);
-      for(unsigned k = 0; k < this->d_; ++k) {
-        for(unsigned j = 0; j < N; ++j) {
-          muhat[k] += this->hills_[j].center[k] / N;
-        }
-        log << muhat[k] << " ";
-      }
-      log << "\nEmpirical covariance matrix:\n";
-      for(unsigned k = 0; k < this->d_; ++k) {
-        for(unsigned l = k; l < this->d_; ++l) {
-          sigmahat(k, l) = sigmahat(l, k) = 0.0;
-          for(unsigned j = 0; j < N; ++j) {
-            sigmahat(k, l) += (this->hills_[j].center[k] - muhat[k]) * (this->hills_[j].center[l] - muhat[l]) / (N - 1);
-          }
-          sigmahat(l, k) = sigmahat(k, l);
-        }
-      }
-      matrixOut(log, sigmahat);
-
-      // record current bias at hill centers before update, for computing relative error after
-      std::vector<double> A0(N);
-      Matrix<double> x(N, this->d_);
-      for(unsigned i = 0; i < N; ++i) {
-        for(unsigned j = 0; j < this->d_; ++j) x(i, j) = this->hills_[i].center[j];
-        A0[i] = getBias(this->hills_[i].center);
-      }
-
-      log << "\nStarting TT-sketch...\n";
-      log.flush();
-      paraSketch();
-      ++this->sketch_count_;
-
-      this->hills_.clear();
-
-      // compute relative L2 approximation error at hill centers: ||V_new - V_old|| / ||V_old||
-      std::vector<double> diff(N);
-      for(unsigned i = 0; i < N; ++i) {
-        std::vector<double> xi(this->d_);
-        for(unsigned j = 0; j < this->d_; ++j) xi[j] = x(i, j);
-        diff[i] = getBias(xi);
-      }
-      std::transform(diff.begin(), diff.end(), A0.begin(), diff.begin(), std::minus<double>());
-      log << "Relative l2 error = " << sqrt(norm(diff) / norm(A0)) << "\n\n";
-      log.flush();
-
-      std::string ttfilename = "ttsketch.h5";
-      if(this->walkers_mpi_) {
-        ttfilename = "../" + ttfilename;
-      }
-      ttWrite(ttfilename, this->vb_, this->sketch_count_);
-    }
-
-    if(this->walkers_mpi_) {
-      multi_sim_comm.Bcast(this->sketch_count_, 0);
-      if(this->mpi_rank_ != 0) {
-        this->hills_.clear();
-        this->vb_ = ttRead("../ttsketch.h5", this->sketch_count_);
-      }
-    }
-    if(getTime() >= this->sketch_until_) {
-      this->frozen_ = true;
-    } else if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
-      this->hillsOfile_.rewind();
-      this->hillsOfile_.clearFields();
-      if(this->fmt_.length() > 0) {
-        this->hillsOfile_.fmtField(this->fmt_);
-      }
-      hillsOfile_.setHeavyFlush();
-      for(unsigned i = 0; i < this->d_; ++i) {
-        hillsOfile_.setupPrintValue(getPntrToArgument(i));
-      }
-    }
-  }
-
-  bool nowAddAHill;
-  if(getStep() % this->stride_ == 0 && !isFirstStep_ && !this->frozen_) {
-    nowAddAHill = true;
-  } else {
-    nowAddAHill = false;
-    this->isFirstStep_ = false;
-  }
-
-  std::vector<double> cv(this->d_);
-  for(unsigned i = 0; i < this->d_; ++i) {
-    cv[i] = getArgument(i);
-  }
-
-  if(nowAddAHill) {
-    double height = getHeight(cv);
-
-    if(this->walkers_mpi_) {
-      std::vector<double> all_cv(this->mpi_size_ * this->d_, 0.0);
-      std::vector<double> all_sigma(this->mpi_size_ * this->sigma0_.size(), 0.0);
-      std::vector<double> all_height(this->mpi_size_, 0.0);
-      multi_sim_comm.Allgather(cv, all_cv);
-      multi_sim_comm.Allgather(this->sigma0_, all_sigma);
-      multi_sim_comm.Allgather(height * (this->biasf_ > 1.0 ? this->biasf_ / (this->biasf_ - 1.0) : 1.0), all_height);
-
-      for(int i = 0; i < this->mpi_size_; i++) {
-        std::vector<double> cv_now(this->d_);
-        std::vector<double> sigma_now(this->sigma0_.size());
-        for(unsigned j = 0; j < this->d_; j++) {
-          cv_now[j] = all_cv[i * this->d_ + j];
-        }
-        for(unsigned j = 0; j < this->sigma0_.size(); j++) {
-          sigma_now[j] = all_sigma[i * this->sigma0_.size() + j];
-        }
-        double fact = (this->biasf_ > 1.0 ? (this->biasf_ - 1.0) / this->biasf_ : 1.0);
-        Gaussian newhill(all_height[i] * fact, cv_now, sigma_now);
-        this->hills_.push_back(newhill);
-        if(this->mpi_rank_ == 0) {
-          writeGaussian(newhill, hillsOfile_);
-        }
-      }
-    } else {
-      Gaussian newhill(height, cv, this->sigma0_);
-      this->hills_.push_back(newhill);
-      writeGaussian(newhill, hillsOfile_);
-    }
-  }
-
-  if(getStep() % this->sketch_stride_ == 1 && !this->frozen_) {
-    log << "Vbias update " << this->sketch_count_ << "...\n\n";
-    log.flush();
-  }
-}
-
-double TTMetaD::getHeight(const std::vector<double>& cv) {
-  double height = this->height0_;
-  if(this->welltemp_) {
-    double vbias = getBias(cv);
-    if(this->biasf_ > 1.0) {
-      height = this->height0_ * exp(-vbias / (this->kbt_ * (this->biasf_ - 1.0)));
-    } else {
-      height = this->height0_ * exp(-vbias / this->kbt_);
-    }
-  }
-  return height;
-}
-
-// Total bias = TT approximation of accumulated hills + sum of recently deposited Gaussians.
-double TTMetaD::getBias(const std::vector<double>& cv) {
-  double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
-  unsigned nt = OpenMP::getNumThreads();
-  #pragma omp parallel num_threads(nt)
-  {
-    #pragma omp for reduction(+:bias) nowait
-    for(unsigned i = 0; i < hills_.size(); ++i) {
-      bias += evaluateGaussian(cv, this->hills_[i]);
-    }
-  }
-  return bias;
-}
-
-double TTMetaD::getBiasAndDerivatives(const std::vector<double>& cv, std::vector<double>& der) {
-  double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
-  if(length(this->vb_) != 0) {
-    der = ttGrad(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
-  }
-  unsigned nt = OpenMP::getNumThreads();
-  if(this->hills_.size() < 2 * nt || nt == 1) {
-    std::vector<double> dp(this->d_);
-    for(unsigned i = 0; i < this->hills_.size(); ++i) {
-      bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], der, dp);
-    }
-  } else {
-    #pragma omp parallel num_threads(nt)
-    {
-      std::vector<double> omp_deriv(this->d_, 0.0);
-      std::vector<double> dp(this->d_);
-      #pragma omp for reduction(+:bias) nowait
-      for(unsigned i = 0; i < this->hills_.size(); ++i) {
-        bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], omp_deriv, dp);
-      }
-      #pragma omp critical
-      for(unsigned i = 0; i < this->d_; ++i) {
-        der[i] += omp_deriv[i];
-      }
-    }
-  }
-  return bias;
-}
-
-double TTMetaD::evaluateGaussian(const std::vector<double>& cv, const Gaussian& hill) {
-  double dp2 = 0.0;
-  for(unsigned i = 0; i < this->d_; i++) {
-    double dp = difference(i, hill.center[i], cv[i]) * hill.invsigma[i];
-    dp2 += dp * dp;
-  }
-  dp2 *= 0.5;
-
-  double bias = 0.0;
-  if(dp2 < dp2cutoff) {
-    bias = hill.height * exp(-dp2);
-  }
-
-  return bias;
-}
-
-double TTMetaD::evaluateGaussianAndDerivatives(const std::vector<double>& cv, const Gaussian& hill, std::vector<double>& der, std::vector<double>& dp) {
-  double dp2 = 0.0;
-  double bias = 0.0;
-  for(unsigned i = 0; i < this->d_; i++) {
-    dp[i] = difference(i, hill.center[i], cv[i]) * hill.invsigma[i];
-    dp2 += dp[i] * dp[i];
-  }
-  dp2 *= 0.5;
-  if(dp2 < dp2cutoff) {
-    bias = hill.height * exp(-dp2);
-    for(unsigned i = 0; i < this->d_; i++) {
-      der[i] -= bias * dp[i] * hill.invsigma[i];
-    }
-  }
-
-  return bias;
-}
-
-// TT-Sketch algorithm: compresses the N accumulated Gaussian hills plus the previous
-// TT bias vb_ into a new low-rank TT approximation G, which becomes the updated vb_.
-//
-// Overview (see paper Algorithm 2):
-//   1. createTTCoeff(): build a random sketch coefficient TT `coeff` of rank sketch_rc_.
-//   2. intBasisSample(): compute M[i][j,k] = <phi_k, g_j>_{dim i}, the inner product of
-//      each basis function with each Gaussian along dimension i.
-//   3. formTensorMoment(): contract coeff with M to get the tensor moment Bemp and
-//      left/right environment tensors envi_L, envi_R needed to form the normal equations.
-//   4. For each bond between cores k and k+1:
-//      - Form the cross-gram matrix A from envi_L and envi_R.
-//      - SVD A to get the rank-trimmed projection V (truncating to sketch_r_ or sketch_cutoff_).
-//   5. Recover TT cores G from Bemp and the pseudo-inverse of U*S.
-//   6. Apply Gram matrix correction (kernel basis only) to convert to dual basis coefficients.
-void TTMetaD::paraSketch() {
-  unsigned N = this->hills_.size();
-  auto coeff = createTTCoeff();
-  auto [M, is] = intBasisSample(siteInds(coeff));
-  MPS G(this->d_);
-
-  auto [Bemp, envi_L, envi_R] = formTensorMoment(M, coeff, is);
-  MPS Bemp_Vb;
-  std::vector<ITensor> envi_L_Vb;
-  std::vector<ITensor> envi_R_Vb;
-  if(this->sketch_count_ != 1) {
-    if(this->nonintrusive_) {
-      // Non-intrusive variant: accumulate tensor moments directly across sketches.
-      // B_prev_ stores the sum of Bemp from all prior intervals; re-index its bond/site
-      // indices to match the current coeff TT's indices before adding.
-      auto sites = siteInds(Bemp);
-      auto sites_prev = siteInds(this->B_prev_);
-      auto links = linkInds(Bemp);
-      auto links_prev = linkInds(this->B_prev_);
-      for(unsigned i = 1; i <= this->d_; ++i) {
-        this->B_prev_.ref(i) *= delta(sites(i), sites_prev(i));
-        if(i != 1) {
-          this->B_prev_.ref(i) *= delta(links(i - 1), links_prev(i - 1));
-        }
-        if (i != this->d_) {
-          this->B_prev_.ref(i) *= delta(links(i), links_prev(i));
-        }
-        Bemp.ref(i) += this->B_prev_(i);
-      }
-    } else {
-      // Intrusive variant: project the previous TT bias vb_ onto the current sketch's
-      // basis by pre-multiplying each core by the Gram matrix (kernel basis) or the
-      // convolution inner-product matrix (Fourier conv mode), then add its tensor moment.
-      if(this->sketch_basis_[0].kernel()) {
-        // Convert vb_ from dual basis to standard basis by applying Gram matrix per core
-        for(unsigned i = 1; i <= this->d_; ++i) {
-          auto s = siteIndex(this->vb_, i);
-          ITensor gram(s, prime(s));
-          for(int j = 1; j <= dim(s); ++j) {
-            for(int l = 1; l <= dim(s); ++l) {
-              gram.set(s = j, prime(s) = l, this->sketch_basis_[i - 1].gram()(j - 1, l - 1));
-            }
-          }
-          this->vb_.ref(i) *= gram;
-          this->vb_.ref(i).noPrime();
-        }
-      }
-      if(this->sketch_conv_) {
-        // Apply Fourier convolution damping factors to vb_ to account for basis smoothing
-        for(unsigned i = 1; i <= this->d_; ++i) {
-          double L = (this->sketch_basis_[i - 1].dom().second - this->sketch_basis_[i - 1].dom().first) / 2;
-          double w = this->sketch_basis_[i - 1].w();
-          auto s = siteIndex(this->vb_, i);
-          ITensor inner(s, prime(s));
-          for(int j = 1; j < dim(s); ++j) {
-            inner.set(s = j, prime(s) = j, exp(-pow(M_PI * w * (j / 2), 2) / (2 * pow(L, 2))));
-          }
-          this->vb_.ref(i) *= inner;
-          this->vb_.ref(i).noPrime();
-        }
-      }
-      auto vbresult = formTensorMomentVb(coeff);
-      Bemp_Vb = std::get<0>(vbresult);
-      envi_L_Vb = std::get<1>(vbresult);
-      envi_R_Vb = std::get<2>(vbresult);
-      for(unsigned i = 1; i <= this->d_; ++i) {
-        Bemp.ref(i) += Bemp_Vb(i);
-      }
-    }
-  }
-  if(this->nonintrusive_) {
-    this->B_prev_ = Bemp;
-    if(this->sketch_count_ == 1) {
-      this->A_prev_.resize(this->d_ - 1);
-    }
-  }
-  // For each bond between cores k-1 and k, form the cross-gram matrix A = L^T * R,
-  // where L = envi_L[k] and R = envi_R[k-2] are the left/right environment projections
-  // of coeff onto the sample indices. SVD A to find the rank-trimmed subspace V.
-  auto links = linkInds(coeff);
-  std::vector<ITensor> U(this->d_), S(this->d_), V(this->d_);
-  std::vector<Index> links_trimmed;
-  for(unsigned core_id = 2; core_id <= this->d_; ++core_id) {
-    int rank = dim(links(core_id - 1));
-    Matrix<double> LMat(N, rank), RMat(N, rank);
-    for(unsigned i = 1; i <= N; ++i) {
-      for(int j = 1; j <= rank; ++j) {
-        LMat(i - 1, j - 1) = envi_L[core_id - 1].elt(is(core_id) = i, links(core_id - 1) = j);
-        RMat(i - 1, j - 1) = envi_R[core_id - 2].elt(is(core_id - 1) = i, links(core_id - 1) = j);
-      }
-    }
-    Matrix<double> Lt, AMat, PMat, AMat_Vb;
-    transpose(LMat, Lt);
-    mult(Lt, RMat, AMat);
-
-    if(this->sketch_count_ != 1 && !this->nonintrusive_) {
-      auto ivb = linkIndex(this->vb_, core_id - 1);
-      int rank_vb = dim(ivb);
-      LMat = Matrix<double>(rank_vb, rank);
-      RMat = Matrix<double>(rank_vb, rank);
-      for(int i = 1; i <= rank_vb; ++i) {
-        for(int j = 1; j <= rank; ++j) {
-          LMat(i - 1, j - 1) = envi_L_Vb[core_id - 1].elt(ivb = i, links(core_id - 1) = j);
-          RMat(i - 1, j - 1) = envi_R_Vb[core_id - 2].elt(ivb = i, links(core_id - 1) = j);
-        }
-      }
-      transpose(LMat, Lt);
-      mult(Lt, RMat, AMat_Vb);
-    }
-
-    ITensor A(prime(links(core_id - 1)), links(core_id - 1));
-    for(int i = 1; i <= rank; ++i) {
-      for(int j = 1; j <= rank; ++j) {
-        A.set(prime(links(core_id - 1)) = i, links(core_id - 1) = j, AMat(i - 1, j - 1));
-      }
-    }
-    if(this->sketch_count_ != 1) {
-      if(this->nonintrusive_) {
-        this->A_prev_[core_id - 2] *= delta(this->A_prev_[core_id - 2].index(1), A.index(1));
-        this->A_prev_[core_id - 2] *= delta(this->A_prev_[core_id - 2].index(2), A.index(2));
-        A += this->A_prev_[core_id - 2];
-      } else {
-        ITensor A_Vb(prime(links(core_id - 1)), links(core_id - 1));
-        for(int i = 1; i <= rank; ++i) {
-          for(int j = 1; j <= rank; ++j) {
-            A_Vb.set(prime(links(core_id - 1)) = i, links(core_id - 1) = j, AMat_Vb(i - 1, j - 1));
-          }
-        }
-        A += A_Vb;
-      }
-    }
-    if(this->nonintrusive_) {
-      this->A_prev_[core_id - 2] = A;
-    }
-    auto original_link_tags = tags(links(core_id - 1));
-    V[core_id - 1] = ITensor(links(core_id - 1));
-    if(this->sketch_r_ > 0) {
-      svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1],
-      {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags, "MaxDim=", this->sketch_r_});
-    } else {
-      svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1], {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags});
-    }
-    links_trimmed.push_back(commonIndex(S[core_id - 1], V[core_id - 1]));
-  }
-
-  // Recover TT cores G from Bemp and the trimmed subspaces.
-  // G[1] = Bemp[1] * V[1] (project first core onto rank-trimmed subspace)
-  // G[k] = pinv(U[k]*S[k]) * Bemp[k] * V[k]  for 2 <= k < d
-  // G[d] = pinv(U[d]*S[d]) * Bemp[d]
-  G.ref(1) = Bemp(1) * V[1];
-  for(unsigned core_id = 2; core_id <= this->d_; ++core_id) {
-    int rank = dim(links(core_id - 1)), rank_trimmed = dim(links_trimmed[core_id - 2]);
-    ITensor A = U[core_id - 1] * S[core_id - 1];
-    ITensor Pinv(links_trimmed[core_id - 2], links(core_id - 1));
-    Matrix<double> AMat(rank, rank_trimmed), PMat;
-    for(int i = 1; i <= rank; ++i) {
-      for(int j = 1; j <= rank_trimmed; ++j) {
-        AMat(i - 1, j - 1) = A.elt(prime(links(core_id - 1)) = i, links_trimmed[core_id - 2] = j);
-      }
-    }
-    pseudoInvert(AMat, PMat);
-
-    for(int i = 1; i <= rank_trimmed; ++i) {
-      for(int j = 1; j <= rank; ++j) {
-        Pinv.set(links_trimmed[core_id - 2] = i, links(core_id - 1) = j, PMat(i - 1, j - 1));
-      }
-    }
-    G.ref(core_id) = Pinv * Bemp(core_id);
-    if(core_id != this->d_) {
-      G.ref(core_id) *= V[core_id];
-    }
-  }
-
-  log << "Final ranks ";
-  for(unsigned i = 1; i < this->d_; ++i) {
-    log << dim(linkIndex(G, i)) << " ";
-  }
-  log << "\n";
-  log.flush();
-
-  if(this->sketch_basis_[0].kernel()) {
-    // Apply pseudo-inverse of the Gram matrix to each core to convert from the
-    // primal Gram representation back to dual basis coefficients (G^+ * G_core).
-    for(unsigned i = 1; i <= this->d_; ++i) {
-      auto s = siteIndex(G, i);
-      ITensor ginv(s, prime(s));
-      for(int j = 1; j <= dim(s); ++j) {
-        for(int l = 1; l <= dim(s); ++l) {
-          ginv.set(s = j, prime(s) = l, this->sketch_basis_[i - 1].ginv()(j - 1, l - 1));
-        }
-      }
-      G.ref(i) *= ginv;
-      G.ref(i).noPrime();
-    }
-  }
-
-  this->vb_ = G;
-}
-
-// Build the random sketch coefficient TT `coeff` of bond dimension sketch_rc_.
-// Each element is drawn from N(0,1); then each core is multiplied by a diagonal
-// matrix diag(1, alpha, alpha, ...) so the constant basis function (pos=1) has
-// full weight while all other harmonics are scaled down by sketch_alpha_. This
-// ensures the sketch captures the large constant component of the bias accurately.
-MPS TTMetaD::createTTCoeff() const {
-  std::default_random_engine generator(this->deterministic_ ? 42u : static_cast<unsigned int>(time(nullptr)));
-  std::normal_distribution<double> distribution(0.0, 1.0);
-  int n = this->sketch_basis_[0].nbasis();
-  auto sites = SiteSet(this->d_, n);
-  auto coeff = MPS(sites, this->sketch_rc_);
-  for(int j = 1; j <= n; ++j) {
-    for(int k = 1; k <= this->sketch_rc_; ++k) {
-      coeff.ref(1).set(sites(1) = j, linkIndex(coeff, 1) = k, distribution(generator));
-    }
-  }
-  for(unsigned i = 2; i <= this->d_ - 1; ++i) {
-    for(int j = 1; j <= n; ++j) {
-      for(int k = 1; k <= this->sketch_rc_; ++k) {
-        for(int l = 1; l <= this->sketch_rc_; ++l) {
-          coeff.ref(i).set(sites(i) = j, linkIndex(coeff, i - 1) = k, linkIndex(coeff, i) = l, distribution(generator));
-        }
-      }
-    }
-  }
-  for(int j = 1; j <= n; ++j) {
-    for(int k = 1; k <= this->sketch_rc_; ++k) {
-      coeff.ref(this->d_).set(sites(this->d_) = j, linkIndex(coeff, this->d_ - 1) = k, distribution(generator));
-    }
-  }
-  for(unsigned i = 1; i <= this->d_; ++i) {
-    auto s = sites(i);
-    auto sp = prime(s);
-    std::vector<double> Avec(n, this->sketch_alpha_);
-    Avec[0] = 1.0;
-    auto A = diagITensor(Avec, s, sp);
-    coeff.ref(i) *= A;
-    coeff.ref(i).noPrime();
-  }
-  return coeff;
-}
-
-// Compute the matrix M[dim i][sample j, basis k] = <phi_k, g_j>_{dim i}, the
-// analytical inner product of each 1D basis function with the 1D marginal of each
-// Gaussian hill along dimension i. The d-dimensional Gaussian factorizes as a
-// product of d 1D Gaussians, so we distribute height as h^(1/d) per dimension.
-// These integrals are computed in closed form:
-//   - Kernel basis: convolution of the kernel function with the 1D Gaussian hill
-//   - Fourier basis: Fourier transform of the 1D Gaussian (exponential decay * harmonic)
-// Returns M as a vector of ITensors (one per dimension) and a new IndexSet is_new
-// where is_new(i) indexes the N sample points for dimension i.
-std::pair<std::vector<ITensor>, IndexSet> TTMetaD::intBasisSample(const IndexSet& is) const {
-  unsigned N = this->hills_.size();
-  int nb = this->sketch_basis_[0].nbasis();
-  auto sites_new = SiteSet(this->d_, N);
-  std::vector<ITensor> M;
-  std::vector<Index> is_new;
-  for(unsigned i = 1; i <= this->d_; ++i) {
-    double L = (this->sketch_basis_[i - 1].dom().second - this->sketch_basis_[i - 1].dom().first) / 2;
-    double a = (this->sketch_basis_[i - 1].dom().second + this->sketch_basis_[i - 1].dom().first) / 2;
-    M.push_back(ITensor(sites_new(i), is(i)));
-    is_new.push_back(sites_new(i));
-    for(unsigned j = 1; j <= N; ++j) {
-      double x = this->hills_[j - 1].center[i - 1];
-      double w = this->hills_[j - 1].sigma[i - 1];
-      double h = pow(this->hills_[j - 1].height, 1.0 / this->d_);  // per-dimension d-th root of height
-      for(int pos = 1; pos <= nb; ++pos) {
-        double result = 0.0;
-        if(this->sketch_basis_[0].kernel()) {
-          if(pos == 1) {
-            // <1, g_j> = integral of 1D Gaussian = sqrt(2*pi)*sigma
-            result = h * sqrt(2 * M_PI) * w;
-          } else {
-            // <kernel_c, g_j> = conv of two Gaussians (widths dx and w), with periodic images
-            double c = this->sketch_basis_[i - 1].center(pos - 1);
-            double dx = this->sketch_basis_[i - 1].dx();
-            for(int k = -1; k <= 1; ++k) {
-              result += exp(-pow(x - c + 2 * k * L, 2) / (2 * (pow(dx, 2) + pow(w, 2)))) * h * sqrt(2 * M_PI) * w /
-                        (sqrt(1 / pow(dx, 2) + 1 / pow(w, 2)) * w);
-            }
-          }
-        } else {
-          if(pos == 1) {
-            // <1/sqrt(2L), g_j> = sqrt(pi/L) * sigma (Gaussian times constant)
-            result = h * sqrt(M_PI / L) * w;
-          } else if(pos % 2 == 0) {
-            // <cos harmonic, g_j>: Fourier transform of Gaussian times exponential damping
-            result = exp(-pow(M_PI * w * (pos / 2), 2) / (2 * pow(L, 2))) * h * sqrt(2 * M_PI / L) * w * cos(M_PI * (x - a) * (pos / 2) / L);
-          } else {
-            result = exp(-pow(M_PI * w * (pos / 2), 2) / (2 * pow(L, 2))) * h * sqrt(2 * M_PI / L) * w * sin(M_PI * (x - a) * (pos / 2) / L);
-          }
-        }
-        M.back().set(sites_new(i) = j, is(i) = pos, result);
-      }
-    }
-  }
-  return std::make_pair(M, IndexSet(is_new));
-}
-
-// Compute the tensor moment B and environment tensors for the new Gaussian hills.
-//
-// First, L = coeff with each core's basis index replaced by the sample index via M:
-//   L[i] = coeff[i] * M[i], so L has (sample_index, bond_left, bond_right) indices.
-//
-// envi_L[i] = partial contraction of L(1)...L(i) summed over sample indices 1..N,
-//   yielding shape (sample_index_{i+1}, link_i). Used to form the left half of A.
-// envi_R[i] = partial contraction of L(i+2)...L(d) summed over sample indices,
-//   yielding shape (sample_index_{i+1}, link_{i+1}). Used to form the right half of A.
-//
-// B[k] = sum_j envi_L[k-1][j, :] * envi_R[k-1][j, :] * M[k][:, basis_k],
-//   which is the "tensor moment" for core k: the sketch of the Gaussian sum projected
-//   onto the basis functions of dimension k and the sketch subspaces of all other dims.
-std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorMoment(const std::vector<ITensor>& M, const MPS& coeff, const IndexSet& is) {
-  int N = dim(is(1));
-  auto links = linkInds(coeff);
-  // L[i] = coeff[i] with basis index contracted against M[i] -> sample index
-  auto L = coeff;
-
-  for(unsigned i = 1; i <= this->d_; ++i) {
-    L.ref(i) *= M[i - 1];
-  }
-
-  // Build left environments: envi_L[i] accumulates the contraction of L(1)..L(i)
-  // over the shared sample index, leaving the next sample index and link free.
-  std::vector<ITensor> envi_L(this->d_);
-  envi_L[1] = L(1) * delta(is(1), is(2));
-  for(unsigned i = 2; i < this->d_; ++i) {
-    int rankl = dim(links(i - 1));
-    int rankr = dim(links(i));
-    envi_L[i] = ITensor(is(i + 1), links(i));
-    for(int j = 1; j <= N; ++j) {
-      for(int k = 1; k <= rankr; ++k) {
-        ITensor LHS(links(i - 1)), RHS(links(i - 1));
-        for(int ii = 1; ii <= rankl; ++ii) {
-          LHS.set(links(i - 1) = ii, envi_L[i - 1].elt(is(i) = j, links(i - 1) = ii));
-          RHS.set(links(i - 1) = ii, L(i).elt(links(i - 1) = ii, is(i) = j, links(i) = k));
-        }
-        envi_L[i].set(is(i + 1) = j, links(i) = k, elt(LHS * RHS));
-      }
-    }
-  }
-
-  // Build right environments: envi_R[i] accumulates L(i+2)..L(d) right-to-left.
-  std::vector<ITensor> envi_R(this->d_);
-  envi_R[this->d_ - 2] = L(this->d_) * delta(is(this->d_), is(this->d_ - 1));
-  for(int i = this->d_ - 3; i >= 0; --i) {
-    int rankl = dim(links(i + 1));
-    int rankr = dim(links(i + 2));
-    envi_R[i] = ITensor(is(i + 1), links(i + 1));
-    for(int j = 1; j <= N; ++j) {
-      for(int k = 1; k <= rankl; ++k) {
-        ITensor LHS(links(i + 2)), RHS(links(i + 2));
-        for(int ii = 1; ii <= rankr; ++ii) {
-          LHS.set(links(i + 2) = ii, envi_R[i + 1].elt(is(i + 2) = j, links(i + 2) = ii));
-          RHS.set(links(i + 2) = ii, L(i + 2).elt(links(i + 2) = ii, is(i + 2) = j, links(i + 1) = k));
-        }
-        envi_R[i].set(is(i + 1) = j, links(i + 1) = k, elt(LHS * RHS));
-      }
-    }
-  }
-
-  // Assemble tensor moment B: for each core, combine the left and right environments
-  // element-wise over sample indices, then contract with M to restore basis indices.
-  MPS B(this->d_);
-  B.ref(1) = envi_R[0] * M[0];
-  for(unsigned core_id = 2; core_id < this->d_; ++core_id) {
-    int rankl = dim(links(core_id - 1));
-    int rankr = dim(links(core_id));
-    B.ref(core_id) = ITensor(links(core_id - 1), is(core_id), links(core_id));
-    for(int i = 1; i <= rankl; ++i) {
-      for(int j = 1; j <= rankr; ++j) {
-        for(int k = 1; k <= N; ++k) {
-          double Lelt = envi_L[core_id - 1].elt(is(core_id) = k, links(core_id - 1) = i);
-          double Relt = envi_R[core_id - 1].elt(is(core_id) = k, links(core_id) = j);
-          B.ref(core_id).set(links(core_id - 1) = i, is(core_id) = k, links(core_id) = j, Lelt * Relt);
-        }
-      }
-    }
-    B.ref(core_id) *= M[core_id - 1];
-  }
-  B.ref(this->d_) = envi_L[this->d_ - 1] * M[this->d_ - 1];
-
-  return std::make_tuple(B, envi_L, envi_R);
-}
-
-// Compute the tensor moment B and environments for the previous TT bias vb_.
-// This is the intrusive variant's contribution from the prior accumulated bias.
-// Analogous to formTensorMoment but uses vb_ in place of M*coeff: the "samples"
-// are the vb_ bond indices, and the environments are formed by contracting coeff
-// with vb_ core by core. The result has the same structure as Bemp from the new hills.
-std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorMomentVb(const MPS& coeff) {
-  // align vb_ site indices with coeff's site indices for contraction
-  for(unsigned i = 1; i <= this->d_; ++i) {
-    this->vb_.ref(i) *= delta(siteIndex(this->vb_, i), siteIndex(coeff, i));
-  }
-  std::vector<ITensor> envi_L(this->d_);
-  envi_L[1] = coeff(1) * this->vb_(1);
-  for(unsigned i = 2; i < this->d_; ++i) {
-    envi_L[i] = envi_L[i - 1] * coeff(i) * this->vb_(i);
-  }
-
-  std::vector<ITensor> envi_R(this->d_);
-  envi_R[this->d_ - 2] = coeff(this->d_) * this->vb_(this->d_);
-  for(int i = this->d_ - 3; i >= 0; --i) {
-    envi_R[i] = envi_R[i + 1] * coeff(i + 2) * this->vb_(i + 2);
-  }
-
-  MPS B(this->d_);
-  B.ref(1) = this->vb_(1) * envi_R[0];
-  for(unsigned core_id = 2; core_id < this->d_; ++core_id) {
-    B.ref(core_id) = envi_L[core_id - 1] * this->vb_(core_id) * envi_R[core_id - 1];
-  }
-  B.ref(this->d_) = envi_L[this->d_ - 1] * this->vb_(this->d_);
-
-  return std::make_tuple(B, envi_L, envi_R);
-}
-
-}
-}
-#endif // __PLUMED_HAS_LIBITENSOR
+/* Copyright (c) 2026, Nils E. Strand, Siyao Yang, Yuehaw Khoo, and Aaron R. Dinner
+ *
+ * Tensor train (TT)-metadynamics.
+ * https://arxiv.org/abs/2603.13549
+ * See module.md for installation instructions.
+ */
+
+#include "TTHelper.h"
+#if defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
+#include "bias/Bias.h"
+#include "core/ActionRegister.h"
+#include "core/ActionSet.h"
+#include "core/PlumedMain.h"
+#include "tools/Exception.h"
+#include "tools/Communicator.h"
+#include "tools/File.h"
+#include "tools/OpenMP.h"
+
+using namespace itensor;
+using namespace PLMD::bias;
+
+namespace PLMD {
+namespace ttsketch {
+
+class TTMetaD : public Bias {
+
+private:
+  struct Gaussian {
+    double height;
+    std::vector<double> center;
+    std::vector<double> sigma;
+    std::vector<double> invsigma;
+    Gaussian(const double h, const std::vector<double>& c, const std::vector<double>& s)
+      : height(h), center(c), sigma(s), invsigma(s) {
+      for(unsigned i = 0; i < invsigma.size(); ++i) {
+        if(abs(invsigma[i]) > 1.e-20) {
+          invsigma[i] = 1.0 / invsigma[i];
+        } else {
+          invsigma[i] = 0.0;
+        }
+      }
+    }
+  };
+  // standard metadynamics parameters
+  double kbt_;
+  int stride_;
+  bool welltemp_;
+  double biasf_;
+  std::string fmt_;
+  bool isFirstStep_;
+  double height0_;
+  std::vector<double> sigma0_;
+  std::vector<Gaussian> hills_;   // Gaussians deposited since the last TT update
+  OFile hillsOfile_;
+  std::string mw_dir_;
+  std::string hillsfname_;
+  bool walkers_mpi_;
+  int mpi_size_;
+  int mpi_rank_;
+  // TT-sketch parameters
+  unsigned d_;               // number of CV dimensions
+  int sketch_rc_;            // initial (sketch) rank for the random TT coefficient tensor
+  int sketch_r_;             // target TT bond dimension after SVD truncation (0 = use cutoff)
+  double sketch_cutoff_;     // relative SVD truncation threshold (used when sketch_r_=0)
+  int sketch_stride_;        // steps between TT bias updates (tau in the paper)
+  double sketch_alpha_;      // weight for non-constant basis functions in the sketch coefficient TT
+  std::vector<BasisFunc> sketch_basis_;  // one BasisFunc per CV dimension
+  unsigned sketch_count_;    // number of TT updates performed so far (starts at 1, increments after each update)
+  MPS vb_;                   // current TT approximation of the accumulated bias (V_bias^TT)
+  double sketch_until_;      // simulation time after which the bias is frozen (no further updates)
+  bool frozen_;              // true once the bias has been frozen (after sketch_until_)
+  bool sketch_conv_;         // true if Gaussian smoothing (conv mode) is active for basis evaluation
+  bool nonintrusive_;        // if true, uses the intrusive-free variant: accumulates Bemp directly
+  // instead of re-projecting the previous vb_ through the Gram matrix
+  bool deterministic_;       // if true, use a fixed RNG seed (42) in createTTCoeff() for reproducibility
+  MPS B_prev_;               // accumulated tensor moment from all previous sketches (nonintrusive only)
+  std::vector<ITensor> A_prev_;   // accumulated cross-gram matrices per bond (nonintrusive only)
+
+  void readGaussians(IFile *ifile);
+  void writeGaussian(const Gaussian& hill, OFile& file);
+  double getHeight(const std::vector<double>& cv);
+  double getBias(const std::vector<double>& cv);
+  double getBiasAndDerivatives(const std::vector<double>& cv, std::vector<double>& der);
+  double evaluateGaussian(const std::vector<double>& cv, const Gaussian& hill);
+  double evaluateGaussianAndDerivatives(const std::vector<double>& cv, const Gaussian& hill, std::vector<double>& der, std::vector<double>& dp);
+  bool scanOneHill(IFile* ifile, std::vector<Value>& tmpvalues, std::vector<double>& center, std::vector<double>& sigma, double& height);
+  void paraSketch();
+  MPS createTTCoeff() const;
+  std::pair<std::vector<ITensor>, IndexSet> intBasisSample(const IndexSet& is) const;
+  std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> formTensorMoment(const std::vector<ITensor>& M, const MPS& coeff, const IndexSet& is);
+  std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> formTensorMomentVb(const MPS& coeff);
+
+public:
+  explicit TTMetaD(const ActionOptions&);
+  void calculate() override;
+  void update() override;
+  static void registerKeywords(Keywords& keys);
+};
+
+PLUMED_REGISTER_ACTION(TTMetaD, "TTMETAD")
+
+void TTMetaD::registerKeywords(Keywords& keys) {
+  Bias::registerKeywords(keys);
+  keys.addFlag("KERNEL_BASIS", false, "Specifies that local kernel basis should be used instead of Fourier basis");
+  keys.add("compulsory", "SIGMA", "the widths of the Gaussian hills");
+  keys.add("compulsory", "PACE", "the frequency for hill addition");
+  keys.add("compulsory", "FILE", "HILLS", "a file in which the list of added hills is stored");
+  keys.add("compulsory", "HEIGHT", "the heights of the Gaussian hills");
+  keys.add("optional", "FMT", "specify format for HILLS files (useful for decrease the number of digits in regtests)");
+  keys.add("optional", "BIASFACTOR", "use well tempered metadynamics and use this bias factor. Please note you must also specify temp");
+  keys.add("optional", "TEMP", "the system temperature - this is only needed if you are doing well-tempered metadynamics");
+  keys.addFlag("WALKERS_MPI", false, "To be used when gromacs + multiple walkers are used");
+  keys.add("optional", "WALKERS_DIR", "shared directory with the hills files from all the walkers");
+  keys.use("RESTART");
+  keys.add("optional", "SKETCH_RANK", "Target rank for TTSketch algorithm - compulsory if SKETCH_CUTOFF is not specified");
+  keys.add("optional", "SKETCH_CUTOFF", "Truncation error cutoff for singular value decomposition - compulsory if SKETCH_RANK is not specified");
+  keys.add("compulsory", "SKETCH_INITRANK", "Initial rank for TTSketch algorithm");
+  keys.add("compulsory", "SKETCH_PACE", "1e6", "The frequency for TT Vbias updates");
+  keys.add("compulsory", "INTERVAL_MIN", "Lower limits, outside the limits the system will not feel the biasing force");
+  keys.add("compulsory", "INTERVAL_MAX", "Upper limits, outside the limits the system will not feel the biasing force");
+  keys.add("compulsory", "SKETCH_NBASIS", "20", "Number of basis functions per dimension");
+  keys.add("compulsory", "SKETCH_ALPHA", "0.05", "Weight coefficient for random tensor train construction");
+  keys.add("optional", "SKETCH_UNTIL", "After this time, the bias potential freezes");
+  keys.add("optional", "SKETCH_WIDTH", "Width of Gaussian kernels for smoothing");
+  keys.add("optional", "KERNEL_DX", "Width of basis function kernels");
+  keys.addFlag("NONINTRUSIVE", false, "Sketching uses previous exact sum of Gaussians instead of TT approximation");
+  keys.addFlag("DETERMINISTIC", false, "Use a fixed random seed for TT sketch construction, ensuring reproducible results. Intended for regression testing.");
+}
+
+TTMetaD::TTMetaD(const ActionOptions& ao):
+  PLUMED_BIAS_INIT(ao),
+  kbt_(0.0),
+  stride_(0),
+  welltemp_(false),
+  biasf_(-1.0),
+  isFirstStep_(true),
+  height0_(std::numeric_limits<double>::max()),
+  mw_dir_(""),
+  walkers_mpi_(false),
+  mpi_size_(0),
+  mpi_rank_(0),
+  sketch_r_(0),
+  sketch_cutoff_(0.0),
+  sketch_count_(1),
+  sketch_until_(std::numeric_limits<double>::max()),
+  frozen_(false),
+  sketch_conv_(false),
+  nonintrusive_(false),
+  deterministic_(false) {
+  bool kernel;
+  parseFlag("KERNEL_BASIS", kernel);
+  this->d_ = getNumberOfArguments();
+  if(this->d_ < 2) {
+    error("Number of arguments must be at least 2");
+  }
+  parse("FMT", this->fmt_);
+  parseVector("SIGMA", this->sigma0_);
+  if(this->sigma0_.size() != d_) {
+    error("number of arguments does not match number of SIGMA parameters");
+  }
+  parse("HEIGHT", this->height0_);
+  parse("PACE", this->stride_);
+  if(stride_ <= 0) {
+    error("frequency for hill addition is nonsensical");
+  }
+  this->hillsfname_ = "HILLS";
+  parse("FILE", this->hillsfname_);
+  parse("BIASFACTOR", this->biasf_);
+  if(this->biasf_ < 1.0 && this->biasf_ != -1.0) {
+    error("well tempered bias factor is nonsensical");
+  }
+  this->kbt_ = getkBT();
+  if(this->biasf_ >= 1.0) {
+    if(this->kbt_ == 0.0) {
+      error("Unless the MD engine passes the temperature to plumed, with well-tempered metad you must specify it using TEMP");
+    }
+    this->welltemp_ = true;
+  }
+
+  parseFlag("WALKERS_MPI", this->walkers_mpi_);
+  parse("WALKERS_DIR", this->mw_dir_);
+  if(this->walkers_mpi_ && this->mw_dir_== "") {
+    const std::string ret = std::filesystem::current_path();
+    this->mw_dir_ = ret + "/";
+    multi_sim_comm.Bcast(this->mw_dir_, 0);
+  }
+  if(this->walkers_mpi_ && this->mw_dir_ != "") {
+    this->hillsfname_ = this->mw_dir_ + "/" + this->hillsfname_;
+  }
+
+  parse("SKETCH_RANK", this->sketch_r_);
+  parse("SKETCH_CUTOFF", this->sketch_cutoff_);
+  if(this->sketch_r_ <= 0 && (this->sketch_cutoff_ <= 0.0 || this->sketch_cutoff_ > 1.0)) {
+    error("Valid SKETCH_RANK or SKETCH_CUTOFF needs to be specified");
+  }
+  parse("SKETCH_INITRANK", this->sketch_rc_);
+  if(this->sketch_rc_ <= 0) {
+    error("SKETCH_INITRANK must be positive");
+  }
+  parse("SKETCH_PACE", this->sketch_stride_);
+  if(this->sketch_stride_ <= 0) {
+    error("SKETCH_PACE must be positive");
+  }
+  std::vector<double> interval_min;
+  parseVector("INTERVAL_MIN", interval_min);
+  if(interval_min.size() != this->d_) {
+    error("Number of arguments does not match number of INTERVAL_MIN parameters");
+  }
+  std::vector<double> interval_max;
+  parseVector("INTERVAL_MAX", interval_max);
+  if(interval_max.size() != this->d_) {
+    error("Number of arguments does not match number of INTERVAL_MAX parameters");
+  }
+  int nbasis = 20;
+  parse("SKETCH_NBASIS", nbasis);
+  if(nbasis <= 1) {
+    error("SKETCH_NBASIS must be greater than 1");
+  }
+  if(!kernel && nbasis % 2 == 0) {
+    ++nbasis;
+  }
+  parse("SKETCH_ALPHA", this->sketch_alpha_);
+  if(this->sketch_alpha_ <= 0.0 || this->sketch_alpha_ > 1.0) {
+    error("SKETCH_ALPHA must be positive and no greater than 1");
+  }
+  std::vector<double> w;
+  parseVector("SKETCH_WIDTH", w);
+  if(w.size() == 0) {
+    w.assign(this->d_, 0.0);
+  }
+  if(w.size() != this->d_) {
+    error("Number of arguments does not match number of SKETCH_WIDTH parameters");
+  }
+  for (double val : w) {
+    if (val != 0.0) {
+      this->sketch_conv_ = true;
+    }
+  }
+  std::vector<double> dx;
+  parseVector("KERNEL_DX", dx);
+  if(dx.size() == 0) {
+    dx.resize(this->d_, 0.0);
+  }
+  if(dx.size() != this->d_) {
+    error("Number of arguments does not match number of KERNEL_DX parameters");
+  }
+  for(unsigned i = 0; i < this->d_; ++i) {
+    if(this->sketch_conv_ && w[i] <= 0.0) {
+      error("Gaussian smoothing requires positive WIDTH");
+    }
+    if(kernel && dx[i] < 0.0) {
+      error("Kernel basis requires positive KERNEL_DX");
+    }
+    if(interval_max[i] <= interval_min[i]) {
+      error("INTERVAL_MAX parameters need to be greater than respective INTERVAL_MIN parameters");
+    }
+    this->sketch_basis_.push_back(BasisFunc(std::make_pair(interval_min[i], interval_max[i]), nbasis, w[i], kernel, dx[i]));
+  }
+  if(kernel && this->sketch_conv_) {
+    error("kernel smoothing incompatible with kernel basis");
+  }
+  if(this->walkers_mpi_) {
+    this->mpi_size_ = multi_sim_comm.Get_size();
+    this->mpi_rank_ = multi_sim_comm.Get_rank();
+  }
+
+  parse("SKETCH_UNTIL", this->sketch_until_);
+
+  parseFlag("NONINTRUSIVE", this->nonintrusive_);
+  parseFlag("DETERMINISTIC", this->deterministic_);
+
+  if(getRestart()) {
+    std::string ttfilename = "ttsketch.h5";
+    if(this->walkers_mpi_) {
+      ttfilename = "../" + ttfilename;
+    }
+    while(true) {
+      try {
+        this->vb_ = ttRead(ttfilename, ++this->sketch_count_);
+      } catch(...) {
+        --this->sketch_count_;
+        break;
+      }
+    }
+    if(this->sketch_count_ == 1) {
+      this->vb_ = MPS();
+    }
+    if(getTime() >= this->sketch_until_) {
+      this->frozen_ = true;
+    } else {
+      IFile hills_ifile;
+      if(!hills_ifile.FileExist(this->hillsfname_)) {
+        error("The hills file cannot be found");
+      }
+      hills_ifile.open(this->hillsfname_);
+      readGaussians(&hills_ifile);
+      hills_ifile.close();
+    }
+  }
+
+  if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+    this->hillsOfile_.link(*this);
+    this->hillsOfile_.enforceSuffix("");
+    this->hillsOfile_.open(this->hillsfname_);
+    if(this->fmt_.length() > 0) {
+      this->hillsOfile_.fmtField(this->fmt_);
+    }
+    hillsOfile_.setHeavyFlush();
+    for(unsigned i = 0; i < this->d_; ++i) {
+      hillsOfile_.setupPrintValue(getPntrToArgument(i));
+    }
+  }
+}
+
+void TTMetaD::readGaussians(IFile *ifile) {
+  std::vector<double> center(this->d_);
+  std::vector<double> sigma(this->d_);
+  double height;
+  int nhills = 0;
+
+  std::vector<Value> tmpvalues;
+  for(unsigned j = 0; j < this->d_; ++j) {
+    tmpvalues.push_back(Value(this, getPntrToArgument(j)->getName(), false));
+  }
+
+  while(scanOneHill(ifile, tmpvalues, center, sigma, height)) {
+    ++nhills;
+    if(this->welltemp_ && this->biasf_ > 1.0) {
+      height *= (this->biasf_ - 1.0) / this->biasf_;
+    }
+    this->hills_.push_back(Gaussian(height, center, sigma));
+  }
+  log << "  restarting from step " << this->sketch_count_ << "\n";
+  log << "  " << nhills << " hills retrieved\n";
+
+  log << "  Bibliography " << plumed.cite("Strand, Yang, Khoo, and Dinner, https://arxiv.org/abs/2603.13549 (2026)");
+  log << plumed.cite("Fishman, White, and Stoudenmire, https://arxiv.org/abs/2007.14822 (2020)");
+  log << "\n";
+}
+
+bool TTMetaD::scanOneHill(IFile* ifile, std::vector<Value>& tmpvalues, std::vector<double>& center, std::vector<double>& sigma, double& height) {
+  double dummy;
+  if(ifile->scanField("time", dummy)) {
+    unsigned ncv = tmpvalues.size();
+    for(unsigned i = 0; i < ncv; ++i) {
+      ifile->scanField(&tmpvalues[i]);
+      if(tmpvalues[i].isPeriodic() && !getPntrToArgument(i)->isPeriodic()) {
+        error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
+      } else if(tmpvalues[i].isPeriodic()) {
+        std::string imin, imax;
+        tmpvalues[i].getDomain(imin, imax);
+        std::string rmin, rmax;
+        getPntrToArgument(i)->getDomain(rmin, rmax);
+        if(imin != rmin || imax != rmax) {
+          error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
+        }
+      }
+      center[i] = tmpvalues[i].get();
+    }
+    for(unsigned i = 0; i < ncv; ++i) {
+      ifile->scanField("sigma_" + getPntrToArgument(i)->getName(), sigma[i]);
+    }
+
+    ifile->scanField("height", height);
+    ifile->scanField("biasf", dummy);
+    ifile->scanField();
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void TTMetaD::writeGaussian(const Gaussian& hill, OFile&file) {
+  file.printField("time", getTimeStep() * getStep());
+  for(unsigned i = 0; i < this->d_; ++i) {
+    file.printField(getPntrToArgument(i), hill.center[i]);
+  }
+  for(unsigned i = 0; i < this->d_; ++i) {
+    file.printField("sigma_" + getPntrToArgument(i)->getName(), hill.sigma[i]);
+  }
+  double height = hill.height;
+  if(this->welltemp_ && this->biasf_ > 1.0) {
+    height *= this->biasf_ / (this->biasf_ - 1.0);
+  }
+  file.printField("height", height).printField("biasf", this->biasf_);
+  file.printField();
+}
+
+// Called every MD step: evaluate total bias (TT + remaining Gaussians) and set forces.
+void TTMetaD::calculate() {
+  std::vector<double> cv(this->d_);
+  for(unsigned i = 0; i < this->d_; ++i) {
+    cv[i] = getArgument(i);
+  }
+
+  std::vector<double> der(this->d_, 0.0);
+
+  double ene = getBiasAndDerivatives(cv, der);
+  setBias(ene);
+  for(unsigned i = 0; i < this->d_; ++i) {
+    setOutputForce(i, -der[i]);
+  }
+}
+
+// Called every MD step after forces are applied.
+// Two separate triggers:
+//   1. Every sketch_stride_ steps: run paraSketch() to compress accumulated hills into vb_,
+//      clear the hills list, and rewind the HILLS file for the next interval.
+//   2. Every stride_ steps: deposit a new Gaussian hill and write it to the HILLS file.
+void TTMetaD::update() {
+  bool nowAddATT;
+  if(getStep() % this->sketch_stride_ == 0 && !this->isFirstStep_ && !this->frozen_) {
+    nowAddATT = true;
+    if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+      this->hillsOfile_.flush();
+    }
+  } else {
+    nowAddATT = false;
+  }
+
+  if(nowAddATT) {
+    if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+      unsigned N = this->hills_.size();
+      log << "Sample limits\n";
+      for(unsigned i = 0; i < this->d_; ++i) {
+        auto [large, small] = this->sketch_basis_[i].dom();
+        for(unsigned j = 0; j < N; ++j) {
+          if(this->hills_[j].center[i] > large) {
+            large = this->hills_[j].center[i];
+          }
+          if(this->hills_[j].center[i] < small) {
+            small = this->hills_[j].center[i];
+          }
+        }
+        log << small << " " << large << "\n";
+      }
+
+      log << "\nEmpirical means:\n";
+      Matrix<double> sigmahat(this->d_, this->d_);
+      std::vector<double> muhat(this->d_, 0.0);
+      for(unsigned k = 0; k < this->d_; ++k) {
+        for(unsigned j = 0; j < N; ++j) {
+          muhat[k] += this->hills_[j].center[k] / N;
+        }
+        log << muhat[k] << " ";
+      }
+      log << "\nEmpirical covariance matrix:\n";
+      for(unsigned k = 0; k < this->d_; ++k) {
+        for(unsigned l = k; l < this->d_; ++l) {
+          sigmahat(k, l) = sigmahat(l, k) = 0.0;
+          for(unsigned j = 0; j < N; ++j) {
+            sigmahat(k, l) += (this->hills_[j].center[k] - muhat[k]) * (this->hills_[j].center[l] - muhat[l]) / (N - 1);
+          }
+          sigmahat(l, k) = sigmahat(k, l);
+        }
+      }
+      matrixOut(log, sigmahat);
+
+      // record current bias at hill centers before update, for computing relative error after
+      std::vector<double> A0(N);
+      Matrix<double> x(N, this->d_);
+      for(unsigned i = 0; i < N; ++i) {
+        for(unsigned j = 0; j < this->d_; ++j) {
+          x(i, j) = this->hills_[i].center[j];
+        }
+        A0[i] = getBias(this->hills_[i].center);
+      }
+
+      log << "\nStarting TT-sketch...\n";
+      log.flush();
+      paraSketch();
+      ++this->sketch_count_;
+
+      this->hills_.clear();
+
+      // compute relative L2 approximation error at hill centers: ||V_new - V_old|| / ||V_old||
+      std::vector<double> diff(N);
+      for(unsigned i = 0; i < N; ++i) {
+        std::vector<double> xi(this->d_);
+        for(unsigned j = 0; j < this->d_; ++j) {
+          xi[j] = x(i, j);
+        }
+        diff[i] = getBias(xi);
+      }
+      std::transform(diff.begin(), diff.end(), A0.begin(), diff.begin(), std::minus<double>());
+      log << "Relative l2 error = " << sqrt(norm(diff) / norm(A0)) << "\n\n";
+      log.flush();
+
+      std::string ttfilename = "ttsketch.h5";
+      if(this->walkers_mpi_) {
+        ttfilename = "../" + ttfilename;
+      }
+      ttWrite(ttfilename, this->vb_, this->sketch_count_);
+    }
+
+    if(this->walkers_mpi_) {
+      multi_sim_comm.Bcast(this->sketch_count_, 0);
+      if(this->mpi_rank_ != 0) {
+        this->hills_.clear();
+        this->vb_ = ttRead("../ttsketch.h5", this->sketch_count_);
+      }
+    }
+    if(getTime() >= this->sketch_until_) {
+      this->frozen_ = true;
+    } else if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+      this->hillsOfile_.rewind();
+      this->hillsOfile_.clearFields();
+      if(this->fmt_.length() > 0) {
+        this->hillsOfile_.fmtField(this->fmt_);
+      }
+      hillsOfile_.setHeavyFlush();
+      for(unsigned i = 0; i < this->d_; ++i) {
+        hillsOfile_.setupPrintValue(getPntrToArgument(i));
+      }
+    }
+  }
+
+  bool nowAddAHill;
+  if(getStep() % this->stride_ == 0 && !isFirstStep_ && !this->frozen_) {
+    nowAddAHill = true;
+  } else {
+    nowAddAHill = false;
+    this->isFirstStep_ = false;
+  }
+
+  std::vector<double> cv(this->d_);
+  for(unsigned i = 0; i < this->d_; ++i) {
+    cv[i] = getArgument(i);
+  }
+
+  if(nowAddAHill) {
+    double height = getHeight(cv);
+
+    if(this->walkers_mpi_) {
+      std::vector<double> all_cv(this->mpi_size_ * this->d_, 0.0);
+      std::vector<double> all_sigma(this->mpi_size_ * this->sigma0_.size(), 0.0);
+      std::vector<double> all_height(this->mpi_size_, 0.0);
+      multi_sim_comm.Allgather(cv, all_cv);
+      multi_sim_comm.Allgather(this->sigma0_, all_sigma);
+      multi_sim_comm.Allgather(height * (this->biasf_ > 1.0 ? this->biasf_ / (this->biasf_ - 1.0) : 1.0), all_height);
+
+      for(int i = 0; i < this->mpi_size_; i++) {
+        std::vector<double> cv_now(this->d_);
+        std::vector<double> sigma_now(this->sigma0_.size());
+        for(unsigned j = 0; j < this->d_; j++) {
+          cv_now[j] = all_cv[i * this->d_ + j];
+        }
+        for(unsigned j = 0; j < this->sigma0_.size(); j++) {
+          sigma_now[j] = all_sigma[i * this->sigma0_.size() + j];
+        }
+        double fact = (this->biasf_ > 1.0 ? (this->biasf_ - 1.0) / this->biasf_ : 1.0);
+        Gaussian newhill(all_height[i] * fact, cv_now, sigma_now);
+        this->hills_.push_back(newhill);
+        if(this->mpi_rank_ == 0) {
+          writeGaussian(newhill, hillsOfile_);
+        }
+      }
+    } else {
+      Gaussian newhill(height, cv, this->sigma0_);
+      this->hills_.push_back(newhill);
+      writeGaussian(newhill, hillsOfile_);
+    }
+  }
+
+  if(getStep() % this->sketch_stride_ == 1 && !this->frozen_) {
+    log << "Vbias update " << this->sketch_count_ << "...\n\n";
+    log.flush();
+  }
+}
+
+double TTMetaD::getHeight(const std::vector<double>& cv) {
+  double height = this->height0_;
+  if(this->welltemp_) {
+    double vbias = getBias(cv);
+    if(this->biasf_ > 1.0) {
+      height = this->height0_ * exp(-vbias / (this->kbt_ * (this->biasf_ - 1.0)));
+    } else {
+      height = this->height0_ * exp(-vbias / this->kbt_);
+    }
+  }
+  return height;
+}
+
+// Total bias = TT approximation of accumulated hills + sum of recently deposited Gaussians.
+double TTMetaD::getBias(const std::vector<double>& cv) {
+  double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
+  unsigned nt = OpenMP::getNumThreads();
+  #pragma omp parallel num_threads(nt)
+  {
+    #pragma omp for reduction(+:bias) nowait
+    for(unsigned i = 0; i < hills_.size(); ++i) {
+      bias += evaluateGaussian(cv, this->hills_[i]);
+    }
+  }
+  return bias;
+}
+
+double TTMetaD::getBiasAndDerivatives(const std::vector<double>& cv, std::vector<double>& der) {
+  double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
+  if(length(this->vb_) != 0) {
+    der = ttGrad(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
+  }
+  unsigned nt = OpenMP::getNumThreads();
+  if(this->hills_.size() < 2 * nt || nt == 1) {
+    std::vector<double> dp(this->d_);
+    for(unsigned i = 0; i < this->hills_.size(); ++i) {
+      bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], der, dp);
+    }
+  } else {
+    #pragma omp parallel num_threads(nt)
+    {
+      std::vector<double> omp_deriv(this->d_, 0.0);
+      std::vector<double> dp(this->d_);
+      #pragma omp for reduction(+:bias) nowait
+      for(unsigned i = 0; i < this->hills_.size(); ++i) {
+        bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], omp_deriv, dp);
+      }
+      #pragma omp critical
+      for(unsigned i = 0; i < this->d_; ++i) {
+        der[i] += omp_deriv[i];
+      }
+    }
+  }
+  return bias;
+}
+
+double TTMetaD::evaluateGaussian(const std::vector<double>& cv, const Gaussian& hill) {
+  double dp2 = 0.0;
+  for(unsigned i = 0; i < this->d_; i++) {
+    double dp = difference(i, hill.center[i], cv[i]) * hill.invsigma[i];
+    dp2 += dp * dp;
+  }
+  dp2 *= 0.5;
+
+  double bias = 0.0;
+  if(dp2 < dp2cutoff) {
+    bias = hill.height * exp(-dp2);
+  }
+
+  return bias;
+}
+
+double TTMetaD::evaluateGaussianAndDerivatives(const std::vector<double>& cv, const Gaussian& hill, std::vector<double>& der, std::vector<double>& dp) {
+  double dp2 = 0.0;
+  double bias = 0.0;
+  for(unsigned i = 0; i < this->d_; i++) {
+    dp[i] = difference(i, hill.center[i], cv[i]) * hill.invsigma[i];
+    dp2 += dp[i] * dp[i];
+  }
+  dp2 *= 0.5;
+  if(dp2 < dp2cutoff) {
+    bias = hill.height * exp(-dp2);
+    for(unsigned i = 0; i < this->d_; i++) {
+      der[i] -= bias * dp[i] * hill.invsigma[i];
+    }
+  }
+
+  return bias;
+}
+
+// TT-Sketch algorithm: compresses the N accumulated Gaussian hills plus the previous
+// TT bias vb_ into a new low-rank TT approximation G, which becomes the updated vb_.
+//
+// Overview (see paper Algorithm 2):
+//   1. createTTCoeff(): build a random sketch coefficient TT `coeff` of rank sketch_rc_.
+//   2. intBasisSample(): compute M[i][j,k] = <phi_k, g_j>_{dim i}, the inner product of
+//      each basis function with each Gaussian along dimension i.
+//   3. formTensorMoment(): contract coeff with M to get the tensor moment Bemp and
+//      left/right environment tensors envi_L, envi_R needed to form the normal equations.
+//   4. For each bond between cores k and k+1:
+//      - Form the cross-gram matrix A from envi_L and envi_R.
+//      - SVD A to get the rank-trimmed projection V (truncating to sketch_r_ or sketch_cutoff_).
+//   5. Recover TT cores G from Bemp and the pseudo-inverse of U*S.
+//   6. Apply Gram matrix correction (kernel basis only) to convert to dual basis coefficients.
+void TTMetaD::paraSketch() {
+  unsigned N = this->hills_.size();
+  auto coeff = createTTCoeff();
+  auto [M, is] = intBasisSample(siteInds(coeff));
+  MPS G(this->d_);
+
+  auto [Bemp, envi_L, envi_R] = formTensorMoment(M, coeff, is);
+  MPS Bemp_Vb;
+  std::vector<ITensor> envi_L_Vb;
+  std::vector<ITensor> envi_R_Vb;
+  if(this->sketch_count_ != 1) {
+    if(this->nonintrusive_) {
+      // Non-intrusive variant: accumulate tensor moments directly across sketches.
+      // B_prev_ stores the sum of Bemp from all prior intervals; re-index its bond/site
+      // indices to match the current coeff TT's indices before adding.
+      auto sites = siteInds(Bemp);
+      auto sites_prev = siteInds(this->B_prev_);
+      auto links = linkInds(Bemp);
+      auto links_prev = linkInds(this->B_prev_);
+      for(unsigned i = 1; i <= this->d_; ++i) {
+        this->B_prev_.ref(i) *= delta(sites(i), sites_prev(i));
+        if(i != 1) {
+          this->B_prev_.ref(i) *= delta(links(i - 1), links_prev(i - 1));
+        }
+        if (i != this->d_) {
+          this->B_prev_.ref(i) *= delta(links(i), links_prev(i));
+        }
+        Bemp.ref(i) += this->B_prev_(i);
+      }
+    } else {
+      // Intrusive variant: project the previous TT bias vb_ onto the current sketch's
+      // basis by pre-multiplying each core by the Gram matrix (kernel basis) or the
+      // convolution inner-product matrix (Fourier conv mode), then add its tensor moment.
+      if(this->sketch_basis_[0].kernel()) {
+        // Convert vb_ from dual basis to standard basis by applying Gram matrix per core
+        for(unsigned i = 1; i <= this->d_; ++i) {
+          auto s = siteIndex(this->vb_, i);
+          ITensor gram(s, prime(s));
+          for(int j = 1; j <= dim(s); ++j) {
+            for(int l = 1; l <= dim(s); ++l) {
+              gram.set(s = j, prime(s) = l, this->sketch_basis_[i - 1].gram()(j - 1, l - 1));
+            }
+          }
+          this->vb_.ref(i) *= gram;
+          this->vb_.ref(i).noPrime();
+        }
+      }
+      if(this->sketch_conv_) {
+        // Apply Fourier convolution damping factors to vb_ to account for basis smoothing
+        for(unsigned i = 1; i <= this->d_; ++i) {
+          double L = (this->sketch_basis_[i - 1].dom().second - this->sketch_basis_[i - 1].dom().first) / 2;
+          double w = this->sketch_basis_[i - 1].w();
+          auto s = siteIndex(this->vb_, i);
+          ITensor inner(s, prime(s));
+          for(int j = 1; j < dim(s); ++j) {
+            inner.set(s = j, prime(s) = j, exp(-pow(M_PI * w * (j / 2), 2) / (2 * pow(L, 2))));
+          }
+          this->vb_.ref(i) *= inner;
+          this->vb_.ref(i).noPrime();
+        }
+      }
+      auto vbresult = formTensorMomentVb(coeff);
+      Bemp_Vb = std::get<0>(vbresult);
+      envi_L_Vb = std::get<1>(vbresult);
+      envi_R_Vb = std::get<2>(vbresult);
+      for(unsigned i = 1; i <= this->d_; ++i) {
+        Bemp.ref(i) += Bemp_Vb(i);
+      }
+    }
+  }
+  if(this->nonintrusive_) {
+    this->B_prev_ = Bemp;
+    if(this->sketch_count_ == 1) {
+      this->A_prev_.resize(this->d_ - 1);
+    }
+  }
+  // For each bond between cores k-1 and k, form the cross-gram matrix A = L^T * R,
+  // where L = envi_L[k] and R = envi_R[k-2] are the left/right environment projections
+  // of coeff onto the sample indices. SVD A to find the rank-trimmed subspace V.
+  auto links = linkInds(coeff);
+  std::vector<ITensor> U(this->d_), S(this->d_), V(this->d_);
+  std::vector<Index> links_trimmed;
+  for(unsigned core_id = 2; core_id <= this->d_; ++core_id) {
+    int rank = dim(links(core_id - 1));
+    Matrix<double> LMat(N, rank), RMat(N, rank);
+    for(unsigned i = 1; i <= N; ++i) {
+      for(int j = 1; j <= rank; ++j) {
+        LMat(i - 1, j - 1) = envi_L[core_id - 1].elt(is(core_id) = i, links(core_id - 1) = j);
+        RMat(i - 1, j - 1) = envi_R[core_id - 2].elt(is(core_id - 1) = i, links(core_id - 1) = j);
+      }
+    }
+    Matrix<double> Lt, AMat, PMat, AMat_Vb;
+    transpose(LMat, Lt);
+    mult(Lt, RMat, AMat);
+
+    if(this->sketch_count_ != 1 && !this->nonintrusive_) {
+      auto ivb = linkIndex(this->vb_, core_id - 1);
+      int rank_vb = dim(ivb);
+      LMat = Matrix<double>(rank_vb, rank);
+      RMat = Matrix<double>(rank_vb, rank);
+      for(int i = 1; i <= rank_vb; ++i) {
+        for(int j = 1; j <= rank; ++j) {
+          LMat(i - 1, j - 1) = envi_L_Vb[core_id - 1].elt(ivb = i, links(core_id - 1) = j);
+          RMat(i - 1, j - 1) = envi_R_Vb[core_id - 2].elt(ivb = i, links(core_id - 1) = j);
+        }
+      }
+      transpose(LMat, Lt);
+      mult(Lt, RMat, AMat_Vb);
+    }
+
+    ITensor A(prime(links(core_id - 1)), links(core_id - 1));
+    for(int i = 1; i <= rank; ++i) {
+      for(int j = 1; j <= rank; ++j) {
+        A.set(prime(links(core_id - 1)) = i, links(core_id - 1) = j, AMat(i - 1, j - 1));
+      }
+    }
+    if(this->sketch_count_ != 1) {
+      if(this->nonintrusive_) {
+        this->A_prev_[core_id - 2] *= delta(this->A_prev_[core_id - 2].index(1), A.index(1));
+        this->A_prev_[core_id - 2] *= delta(this->A_prev_[core_id - 2].index(2), A.index(2));
+        A += this->A_prev_[core_id - 2];
+      } else {
+        ITensor A_Vb(prime(links(core_id - 1)), links(core_id - 1));
+        for(int i = 1; i <= rank; ++i) {
+          for(int j = 1; j <= rank; ++j) {
+            A_Vb.set(prime(links(core_id - 1)) = i, links(core_id - 1) = j, AMat_Vb(i - 1, j - 1));
+          }
+        }
+        A += A_Vb;
+      }
+    }
+    if(this->nonintrusive_) {
+      this->A_prev_[core_id - 2] = A;
+    }
+    auto original_link_tags = tags(links(core_id - 1));
+    V[core_id - 1] = ITensor(links(core_id - 1));
+    if(this->sketch_r_ > 0) {
+      svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1],
+      {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags, "MaxDim=", this->sketch_r_});
+    } else {
+      svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1], {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags});
+    }
+    links_trimmed.push_back(commonIndex(S[core_id - 1], V[core_id - 1]));
+  }
+
+  // Recover TT cores G from Bemp and the trimmed subspaces.
+  // G[1] = Bemp[1] * V[1] (project first core onto rank-trimmed subspace)
+  // G[k] = pinv(U[k]*S[k]) * Bemp[k] * V[k]  for 2 <= k < d
+  // G[d] = pinv(U[d]*S[d]) * Bemp[d]
+  G.ref(1) = Bemp(1) * V[1];
+  for(unsigned core_id = 2; core_id <= this->d_; ++core_id) {
+    int rank = dim(links(core_id - 1)), rank_trimmed = dim(links_trimmed[core_id - 2]);
+    ITensor A = U[core_id - 1] * S[core_id - 1];
+    ITensor Pinv(links_trimmed[core_id - 2], links(core_id - 1));
+    Matrix<double> AMat(rank, rank_trimmed), PMat;
+    for(int i = 1; i <= rank; ++i) {
+      for(int j = 1; j <= rank_trimmed; ++j) {
+        AMat(i - 1, j - 1) = A.elt(prime(links(core_id - 1)) = i, links_trimmed[core_id - 2] = j);
+      }
+    }
+    pseudoInvert(AMat, PMat);
+
+    for(int i = 1; i <= rank_trimmed; ++i) {
+      for(int j = 1; j <= rank; ++j) {
+        Pinv.set(links_trimmed[core_id - 2] = i, links(core_id - 1) = j, PMat(i - 1, j - 1));
+      }
+    }
+    G.ref(core_id) = Pinv * Bemp(core_id);
+    if(core_id != this->d_) {
+      G.ref(core_id) *= V[core_id];
+    }
+  }
+
+  log << "Final ranks ";
+  for(unsigned i = 1; i < this->d_; ++i) {
+    log << dim(linkIndex(G, i)) << " ";
+  }
+  log << "\n";
+  log.flush();
+
+  if(this->sketch_basis_[0].kernel()) {
+    // Apply pseudo-inverse of the Gram matrix to each core to convert from the
+    // primal Gram representation back to dual basis coefficients (G^+ * G_core).
+    for(unsigned i = 1; i <= this->d_; ++i) {
+      auto s = siteIndex(G, i);
+      ITensor ginv(s, prime(s));
+      for(int j = 1; j <= dim(s); ++j) {
+        for(int l = 1; l <= dim(s); ++l) {
+          ginv.set(s = j, prime(s) = l, this->sketch_basis_[i - 1].ginv()(j - 1, l - 1));
+        }
+      }
+      G.ref(i) *= ginv;
+      G.ref(i).noPrime();
+    }
+  }
+
+  this->vb_ = G;
+}
+
+// Build the random sketch coefficient TT `coeff` of bond dimension sketch_rc_.
+// Each element is drawn from N(0,1); then each core is multiplied by a diagonal
+// matrix diag(1, alpha, alpha, ...) so the constant basis function (pos=1) has
+// full weight while all other harmonics are scaled down by sketch_alpha_. This
+// ensures the sketch captures the large constant component of the bias accurately.
+MPS TTMetaD::createTTCoeff() const {
+  std::default_random_engine generator(this->deterministic_ ? 42u : static_cast<unsigned int>(time(nullptr)));
+  std::normal_distribution<double> distribution(0.0, 1.0);
+  int n = this->sketch_basis_[0].nbasis();
+  auto sites = SiteSet(this->d_, n);
+  auto coeff = MPS(sites, this->sketch_rc_);
+  for(int j = 1; j <= n; ++j) {
+    for(int k = 1; k <= this->sketch_rc_; ++k) {
+      coeff.ref(1).set(sites(1) = j, linkIndex(coeff, 1) = k, distribution(generator));
+    }
+  }
+  for(unsigned i = 2; i <= this->d_ - 1; ++i) {
+    for(int j = 1; j <= n; ++j) {
+      for(int k = 1; k <= this->sketch_rc_; ++k) {
+        for(int l = 1; l <= this->sketch_rc_; ++l) {
+          coeff.ref(i).set(sites(i) = j, linkIndex(coeff, i - 1) = k, linkIndex(coeff, i) = l, distribution(generator));
+        }
+      }
+    }
+  }
+  for(int j = 1; j <= n; ++j) {
+    for(int k = 1; k <= this->sketch_rc_; ++k) {
+      coeff.ref(this->d_).set(sites(this->d_) = j, linkIndex(coeff, this->d_ - 1) = k, distribution(generator));
+    }
+  }
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    auto s = sites(i);
+    auto sp = prime(s);
+    std::vector<double> Avec(n, this->sketch_alpha_);
+    Avec[0] = 1.0;
+    auto A = diagITensor(Avec, s, sp);
+    coeff.ref(i) *= A;
+    coeff.ref(i).noPrime();
+  }
+  return coeff;
+}
+
+// Compute the matrix M[dim i][sample j, basis k] = <phi_k, g_j>_{dim i}, the
+// analytical inner product of each 1D basis function with the 1D marginal of each
+// Gaussian hill along dimension i. The d-dimensional Gaussian factorizes as a
+// product of d 1D Gaussians, so we distribute height as h^(1/d) per dimension.
+// These integrals are computed in closed form:
+//   - Kernel basis: convolution of the kernel function with the 1D Gaussian hill
+//   - Fourier basis: Fourier transform of the 1D Gaussian (exponential decay * harmonic)
+// Returns M as a vector of ITensors (one per dimension) and a new IndexSet is_new
+// where is_new(i) indexes the N sample points for dimension i.
+std::pair<std::vector<ITensor>, IndexSet> TTMetaD::intBasisSample(const IndexSet& is) const {
+  unsigned N = this->hills_.size();
+  int nb = this->sketch_basis_[0].nbasis();
+  auto sites_new = SiteSet(this->d_, N);
+  std::vector<ITensor> M;
+  std::vector<Index> is_new;
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    double L = (this->sketch_basis_[i - 1].dom().second - this->sketch_basis_[i - 1].dom().first) / 2;
+    double a = (this->sketch_basis_[i - 1].dom().second + this->sketch_basis_[i - 1].dom().first) / 2;
+    M.push_back(ITensor(sites_new(i), is(i)));
+    is_new.push_back(sites_new(i));
+    for(unsigned j = 1; j <= N; ++j) {
+      double x = this->hills_[j - 1].center[i - 1];
+      double w = this->hills_[j - 1].sigma[i - 1];
+      double h = pow(this->hills_[j - 1].height, 1.0 / this->d_);  // per-dimension d-th root of height
+      for(int pos = 1; pos <= nb; ++pos) {
+        double result = 0.0;
+        if(this->sketch_basis_[0].kernel()) {
+          if(pos == 1) {
+            // <1, g_j> = integral of 1D Gaussian = sqrt(2*pi)*sigma
+            result = h * sqrt(2 * M_PI) * w;
+          } else {
+            // <kernel_c, g_j> = conv of two Gaussians (widths dx and w), with periodic images
+            double c = this->sketch_basis_[i - 1].center(pos - 1);
+            double dx = this->sketch_basis_[i - 1].dx();
+            for(int k = -1; k <= 1; ++k) {
+              result += exp(-pow(x - c + 2 * k * L, 2) / (2 * (pow(dx, 2) + pow(w, 2)))) * h * sqrt(2 * M_PI) * w /
+                        (sqrt(1 / pow(dx, 2) + 1 / pow(w, 2)) * w);
+            }
+          }
+        } else {
+          if(pos == 1) {
+            // <1/sqrt(2L), g_j> = sqrt(pi/L) * sigma (Gaussian times constant)
+            result = h * sqrt(M_PI / L) * w;
+          } else if(pos % 2 == 0) {
+            // <cos harmonic, g_j>: Fourier transform of Gaussian times exponential damping
+            result = exp(-pow(M_PI * w * (pos / 2), 2) / (2 * pow(L, 2))) * h * sqrt(2 * M_PI / L) * w * cos(M_PI * (x - a) * (pos / 2) / L);
+          } else {
+            result = exp(-pow(M_PI * w * (pos / 2), 2) / (2 * pow(L, 2))) * h * sqrt(2 * M_PI / L) * w * sin(M_PI * (x - a) * (pos / 2) / L);
+          }
+        }
+        M.back().set(sites_new(i) = j, is(i) = pos, result);
+      }
+    }
+  }
+  return std::make_pair(M, IndexSet(is_new));
+}
+
+// Compute the tensor moment B and environment tensors for the new Gaussian hills.
+//
+// First, L = coeff with each core's basis index replaced by the sample index via M:
+//   L[i] = coeff[i] * M[i], so L has (sample_index, bond_left, bond_right) indices.
+//
+// envi_L[i] = partial contraction of L(1)...L(i) summed over sample indices 1..N,
+//   yielding shape (sample_index_{i+1}, link_i). Used to form the left half of A.
+// envi_R[i] = partial contraction of L(i+2)...L(d) summed over sample indices,
+//   yielding shape (sample_index_{i+1}, link_{i+1}). Used to form the right half of A.
+//
+// B[k] = sum_j envi_L[k-1][j, :] * envi_R[k-1][j, :] * M[k][:, basis_k],
+//   which is the "tensor moment" for core k: the sketch of the Gaussian sum projected
+//   onto the basis functions of dimension k and the sketch subspaces of all other dims.
+std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorMoment(const std::vector<ITensor>& M, const MPS& coeff, const IndexSet& is) {
+  int N = dim(is(1));
+  auto links = linkInds(coeff);
+  // L[i] = coeff[i] with basis index contracted against M[i] -> sample index
+  auto L = coeff;
+
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    L.ref(i) *= M[i - 1];
+  }
+
+  // Build left environments: envi_L[i] accumulates the contraction of L(1)..L(i)
+  // over the shared sample index, leaving the next sample index and link free.
+  std::vector<ITensor> envi_L(this->d_);
+  envi_L[1] = L(1) * delta(is(1), is(2));
+  for(unsigned i = 2; i < this->d_; ++i) {
+    int rankl = dim(links(i - 1));
+    int rankr = dim(links(i));
+    envi_L[i] = ITensor(is(i + 1), links(i));
+    for(int j = 1; j <= N; ++j) {
+      for(int k = 1; k <= rankr; ++k) {
+        ITensor LHS(links(i - 1)), RHS(links(i - 1));
+        for(int ii = 1; ii <= rankl; ++ii) {
+          LHS.set(links(i - 1) = ii, envi_L[i - 1].elt(is(i) = j, links(i - 1) = ii));
+          RHS.set(links(i - 1) = ii, L(i).elt(links(i - 1) = ii, is(i) = j, links(i) = k));
+        }
+        envi_L[i].set(is(i + 1) = j, links(i) = k, elt(LHS * RHS));
+      }
+    }
+  }
+
+  // Build right environments: envi_R[i] accumulates L(i+2)..L(d) right-to-left.
+  std::vector<ITensor> envi_R(this->d_);
+  envi_R[this->d_ - 2] = L(this->d_) * delta(is(this->d_), is(this->d_ - 1));
+  for(int i = this->d_ - 3; i >= 0; --i) {
+    int rankl = dim(links(i + 1));
+    int rankr = dim(links(i + 2));
+    envi_R[i] = ITensor(is(i + 1), links(i + 1));
+    for(int j = 1; j <= N; ++j) {
+      for(int k = 1; k <= rankl; ++k) {
+        ITensor LHS(links(i + 2)), RHS(links(i + 2));
+        for(int ii = 1; ii <= rankr; ++ii) {
+          LHS.set(links(i + 2) = ii, envi_R[i + 1].elt(is(i + 2) = j, links(i + 2) = ii));
+          RHS.set(links(i + 2) = ii, L(i + 2).elt(links(i + 2) = ii, is(i + 2) = j, links(i + 1) = k));
+        }
+        envi_R[i].set(is(i + 1) = j, links(i + 1) = k, elt(LHS * RHS));
+      }
+    }
+  }
+
+  // Assemble tensor moment B: for each core, combine the left and right environments
+  // element-wise over sample indices, then contract with M to restore basis indices.
+  MPS B(this->d_);
+  B.ref(1) = envi_R[0] * M[0];
+  for(unsigned core_id = 2; core_id < this->d_; ++core_id) {
+    int rankl = dim(links(core_id - 1));
+    int rankr = dim(links(core_id));
+    B.ref(core_id) = ITensor(links(core_id - 1), is(core_id), links(core_id));
+    for(int i = 1; i <= rankl; ++i) {
+      for(int j = 1; j <= rankr; ++j) {
+        for(int k = 1; k <= N; ++k) {
+          double Lelt = envi_L[core_id - 1].elt(is(core_id) = k, links(core_id - 1) = i);
+          double Relt = envi_R[core_id - 1].elt(is(core_id) = k, links(core_id) = j);
+          B.ref(core_id).set(links(core_id - 1) = i, is(core_id) = k, links(core_id) = j, Lelt * Relt);
+        }
+      }
+    }
+    B.ref(core_id) *= M[core_id - 1];
+  }
+  B.ref(this->d_) = envi_L[this->d_ - 1] * M[this->d_ - 1];
+
+  return std::make_tuple(B, envi_L, envi_R);
+}
+
+// Compute the tensor moment B and environments for the previous TT bias vb_.
+// This is the intrusive variant's contribution from the prior accumulated bias.
+// Analogous to formTensorMoment but uses vb_ in place of M*coeff: the "samples"
+// are the vb_ bond indices, and the environments are formed by contracting coeff
+// with vb_ core by core. The result has the same structure as Bemp from the new hills.
+std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorMomentVb(const MPS& coeff) {
+  // align vb_ site indices with coeff's site indices for contraction
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    this->vb_.ref(i) *= delta(siteIndex(this->vb_, i), siteIndex(coeff, i));
+  }
+  std::vector<ITensor> envi_L(this->d_);
+  envi_L[1] = coeff(1) * this->vb_(1);
+  for(unsigned i = 2; i < this->d_; ++i) {
+    envi_L[i] = envi_L[i - 1] * coeff(i) * this->vb_(i);
+  }
+
+  std::vector<ITensor> envi_R(this->d_);
+  envi_R[this->d_ - 2] = coeff(this->d_) * this->vb_(this->d_);
+  for(int i = this->d_ - 3; i >= 0; --i) {
+    envi_R[i] = envi_R[i + 1] * coeff(i + 2) * this->vb_(i + 2);
+  }
+
+  MPS B(this->d_);
+  B.ref(1) = this->vb_(1) * envi_R[0];
+  for(unsigned core_id = 2; core_id < this->d_; ++core_id) {
+    B.ref(core_id) = envi_L[core_id - 1] * this->vb_(core_id) * envi_R[core_id - 1];
+  }
+  B.ref(this->d_) = envi_L[this->d_ - 1] * this->vb_(this->d_);
+
+  return std::make_tuple(B, envi_L, envi_R);
+}
+
+}
+}
+#endif // defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
diff --git a/src/ttsketch/module.md b/src/ttsketch/module.md
index 8d4f6951e7..ced3c2ebe5 100644
--- a/src/ttsketch/module.md
+++ b/src/ttsketch/module.md
@@ -39,7 +39,7 @@ Once ITensor is built, configure PLUMED from its source directory. Adjust
 library paths and flags to match your `options.mk`:
 
 ```bash
-./configure --enable-modules=ttsketch \
+./configure --enable-modules=ttsketch --enable-libitensor \
     LDFLAGS="-L/path/to/ITensor/lib" \
     CPPFLAGS="-I/path/to/ITensor -DITENSOR_USE_HDF5 -DITENSOR_USE_OMP -D__ITENSOR_LAPACK_WRAP_h" \
     LIBS="-litensor -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lhdf5 -lhdf5_hl" \

From 2ebc7de8991ecdf83efec9b94b84f16557611d2c Mon Sep 17 00:00:00 2001
From: Nils Strand <nstrand@midway3-login3.rcc.local>
Date: Mon, 11 May 2026 16:28:17 -0500
Subject: [PATCH 21/26] fix attempt-codecheck, doc, nvhpc

---
 configure                 |   50 +-
 configure.ac              |    5 +-
 src/ttsketch/TTHelper.cpp |    6 +-
 src/ttsketch/TTHelper.h   |    6 +-
 src/ttsketch/TTMetaD.cpp  | 2225 +++++++++++++++++++------------------
 5 files changed, 1193 insertions(+), 1099 deletions(-)

diff --git a/configure b/configure
index da78d1145a..fed871bf96 100755
--- a/configure
+++ b/configure
@@ -10555,7 +10555,7 @@ fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
   save_CXXFLAGS="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -fopenmp"
+  CXXFLAGS="$CXXFLAGS -fconcepts -fopenmp"
 
     found=ko
     __PLUMED_HAS_LIBITENSOR=no
@@ -10681,6 +10681,54 @@ $as_echo "$as_me: WARNING: cannot enable __PLUMED_HAS_LIBITENSOR" >&2;}
     fi
 
   CXXFLAGS="$save_CXXFLAGS"
+  if test "$__PLUMED_HAS_LIBITENSOR" = yes ; then
+
+  save_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -fconcepts"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -fconcepts" >&5
+$as_echo_n "checking whether $CXX accepts -fconcepts... " >&6; }
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: not linking" >&5
+$as_echo "not linking" >&6; }; CXXFLAGS="$save_CXXFLAGS"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }; CXXFLAGS="$save_CXXFLAGS"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+  fi
 fi
 
 acc_found=ko
diff --git a/configure.ac b/configure.ac
index 80ac930fcc..637bb9c2be 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1055,7 +1055,7 @@ if test $libitensor = true ; then
   # everywhere, this PLUMED_CHECK_CXXFLAG call can be removed.
   PLUMED_CHECK_CXXFLAG([-fconcepts])
   save_CXXFLAGS="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -fopenmp"
+  CXXFLAGS="$CXXFLAGS -fconcepts -fopenmp"
   PLUMED_CHECK_CXX_PACKAGE([libitensor],[
     #include <itensor/all.h>
     int main() {
@@ -1066,6 +1066,9 @@ if test $libitensor = true ; then
     }
   ], [__PLUMED_HAS_LIBITENSOR], [ itensor openblas ], [true])
   CXXFLAGS="$save_CXXFLAGS"
+  if test "$__PLUMED_HAS_LIBITENSOR" = yes ; then
+    PLUMED_CHECK_CXXFLAG([-fconcepts])
+  fi
 fi
 
 acc_found=ko
diff --git a/src/ttsketch/TTHelper.cpp b/src/ttsketch/TTHelper.cpp
index 27b335e166..8d51193ea5 100644
--- a/src/ttsketch/TTHelper.cpp
+++ b/src/ttsketch/TTHelper.cpp
@@ -4,7 +4,8 @@
    See the COPYRIGHT file distributed with this software for license details.
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
 #include "TTHelper.h"
-#if defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
+#ifdef __PLUMED_HAS_LIBITENSOR
+#ifdef __PLUMED_HAS_LIBHDF5
 
 using namespace itensor;
 
@@ -182,4 +183,5 @@ void marginal2d(const MPS& tt, const std::vector<BasisFunc>& basis, int pos1, in
 
 }
 }
-#endif // defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
+#endif // __PLUMED_HAS_LIBHDF5
+#endif // __PLUMED_HAS_LIBITENSOR
diff --git a/src/ttsketch/TTHelper.h b/src/ttsketch/TTHelper.h
index 7168102ed7..a05007e1fb 100644
--- a/src/ttsketch/TTHelper.h
+++ b/src/ttsketch/TTHelper.h
@@ -2,7 +2,8 @@
 #define __PLUMED_ttsketch_TTHelper_h
 #include "BasisFunc.h"
 #include "tools/Matrix.h"
-#if defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
+#ifdef __PLUMED_HAS_LIBITENSOR
+#ifdef __PLUMED_HAS_LIBHDF5
 #include <itensor/all.h>
 
 namespace PLMD {
@@ -36,5 +37,6 @@ void marginal2d(const itensor::MPS& tt, const std::vector<BasisFunc>& basis, int
 
 }
 }
-#endif // defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
+#endif // __PLUMED_HAS_LIBHDF5
+#endif // __PLUMED_HAS_LIBITENSOR
 #endif
diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index 4d043f249c..367eef81f3 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -1,1093 +1,1132 @@
-/* Copyright (c) 2026, Nils E. Strand, Siyao Yang, Yuehaw Khoo, and Aaron R. Dinner
- *
- * Tensor train (TT)-metadynamics.
- * https://arxiv.org/abs/2603.13549
- * See module.md for installation instructions.
- */
-
-#include "TTHelper.h"
-#if defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
-#include "bias/Bias.h"
-#include "core/ActionRegister.h"
-#include "core/ActionSet.h"
-#include "core/PlumedMain.h"
-#include "tools/Exception.h"
-#include "tools/Communicator.h"
-#include "tools/File.h"
-#include "tools/OpenMP.h"
-
-using namespace itensor;
-using namespace PLMD::bias;
-
-namespace PLMD {
-namespace ttsketch {
-
-class TTMetaD : public Bias {
-
-private:
-  struct Gaussian {
-    double height;
-    std::vector<double> center;
-    std::vector<double> sigma;
-    std::vector<double> invsigma;
-    Gaussian(const double h, const std::vector<double>& c, const std::vector<double>& s)
-      : height(h), center(c), sigma(s), invsigma(s) {
-      for(unsigned i = 0; i < invsigma.size(); ++i) {
-        if(abs(invsigma[i]) > 1.e-20) {
-          invsigma[i] = 1.0 / invsigma[i];
-        } else {
-          invsigma[i] = 0.0;
-        }
-      }
-    }
-  };
-  // standard metadynamics parameters
-  double kbt_;
-  int stride_;
-  bool welltemp_;
-  double biasf_;
-  std::string fmt_;
-  bool isFirstStep_;
-  double height0_;
-  std::vector<double> sigma0_;
-  std::vector<Gaussian> hills_;   // Gaussians deposited since the last TT update
-  OFile hillsOfile_;
-  std::string mw_dir_;
-  std::string hillsfname_;
-  bool walkers_mpi_;
-  int mpi_size_;
-  int mpi_rank_;
-  // TT-sketch parameters
-  unsigned d_;               // number of CV dimensions
-  int sketch_rc_;            // initial (sketch) rank for the random TT coefficient tensor
-  int sketch_r_;             // target TT bond dimension after SVD truncation (0 = use cutoff)
-  double sketch_cutoff_;     // relative SVD truncation threshold (used when sketch_r_=0)
-  int sketch_stride_;        // steps between TT bias updates (tau in the paper)
-  double sketch_alpha_;      // weight for non-constant basis functions in the sketch coefficient TT
-  std::vector<BasisFunc> sketch_basis_;  // one BasisFunc per CV dimension
-  unsigned sketch_count_;    // number of TT updates performed so far (starts at 1, increments after each update)
-  MPS vb_;                   // current TT approximation of the accumulated bias (V_bias^TT)
-  double sketch_until_;      // simulation time after which the bias is frozen (no further updates)
-  bool frozen_;              // true once the bias has been frozen (after sketch_until_)
-  bool sketch_conv_;         // true if Gaussian smoothing (conv mode) is active for basis evaluation
-  bool nonintrusive_;        // if true, uses the intrusive-free variant: accumulates Bemp directly
-  // instead of re-projecting the previous vb_ through the Gram matrix
-  bool deterministic_;       // if true, use a fixed RNG seed (42) in createTTCoeff() for reproducibility
-  MPS B_prev_;               // accumulated tensor moment from all previous sketches (nonintrusive only)
-  std::vector<ITensor> A_prev_;   // accumulated cross-gram matrices per bond (nonintrusive only)
-
-  void readGaussians(IFile *ifile);
-  void writeGaussian(const Gaussian& hill, OFile& file);
-  double getHeight(const std::vector<double>& cv);
-  double getBias(const std::vector<double>& cv);
-  double getBiasAndDerivatives(const std::vector<double>& cv, std::vector<double>& der);
-  double evaluateGaussian(const std::vector<double>& cv, const Gaussian& hill);
-  double evaluateGaussianAndDerivatives(const std::vector<double>& cv, const Gaussian& hill, std::vector<double>& der, std::vector<double>& dp);
-  bool scanOneHill(IFile* ifile, std::vector<Value>& tmpvalues, std::vector<double>& center, std::vector<double>& sigma, double& height);
-  void paraSketch();
-  MPS createTTCoeff() const;
-  std::pair<std::vector<ITensor>, IndexSet> intBasisSample(const IndexSet& is) const;
-  std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> formTensorMoment(const std::vector<ITensor>& M, const MPS& coeff, const IndexSet& is);
-  std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> formTensorMomentVb(const MPS& coeff);
-
-public:
-  explicit TTMetaD(const ActionOptions&);
-  void calculate() override;
-  void update() override;
-  static void registerKeywords(Keywords& keys);
-};
-
-PLUMED_REGISTER_ACTION(TTMetaD, "TTMETAD")
-
-void TTMetaD::registerKeywords(Keywords& keys) {
-  Bias::registerKeywords(keys);
-  keys.addFlag("KERNEL_BASIS", false, "Specifies that local kernel basis should be used instead of Fourier basis");
-  keys.add("compulsory", "SIGMA", "the widths of the Gaussian hills");
-  keys.add("compulsory", "PACE", "the frequency for hill addition");
-  keys.add("compulsory", "FILE", "HILLS", "a file in which the list of added hills is stored");
-  keys.add("compulsory", "HEIGHT", "the heights of the Gaussian hills");
-  keys.add("optional", "FMT", "specify format for HILLS files (useful for decrease the number of digits in regtests)");
-  keys.add("optional", "BIASFACTOR", "use well tempered metadynamics and use this bias factor. Please note you must also specify temp");
-  keys.add("optional", "TEMP", "the system temperature - this is only needed if you are doing well-tempered metadynamics");
-  keys.addFlag("WALKERS_MPI", false, "To be used when gromacs + multiple walkers are used");
-  keys.add("optional", "WALKERS_DIR", "shared directory with the hills files from all the walkers");
-  keys.use("RESTART");
-  keys.add("optional", "SKETCH_RANK", "Target rank for TTSketch algorithm - compulsory if SKETCH_CUTOFF is not specified");
-  keys.add("optional", "SKETCH_CUTOFF", "Truncation error cutoff for singular value decomposition - compulsory if SKETCH_RANK is not specified");
-  keys.add("compulsory", "SKETCH_INITRANK", "Initial rank for TTSketch algorithm");
-  keys.add("compulsory", "SKETCH_PACE", "1e6", "The frequency for TT Vbias updates");
-  keys.add("compulsory", "INTERVAL_MIN", "Lower limits, outside the limits the system will not feel the biasing force");
-  keys.add("compulsory", "INTERVAL_MAX", "Upper limits, outside the limits the system will not feel the biasing force");
-  keys.add("compulsory", "SKETCH_NBASIS", "20", "Number of basis functions per dimension");
-  keys.add("compulsory", "SKETCH_ALPHA", "0.05", "Weight coefficient for random tensor train construction");
-  keys.add("optional", "SKETCH_UNTIL", "After this time, the bias potential freezes");
-  keys.add("optional", "SKETCH_WIDTH", "Width of Gaussian kernels for smoothing");
-  keys.add("optional", "KERNEL_DX", "Width of basis function kernels");
-  keys.addFlag("NONINTRUSIVE", false, "Sketching uses previous exact sum of Gaussians instead of TT approximation");
-  keys.addFlag("DETERMINISTIC", false, "Use a fixed random seed for TT sketch construction, ensuring reproducible results. Intended for regression testing.");
-}
-
-TTMetaD::TTMetaD(const ActionOptions& ao):
-  PLUMED_BIAS_INIT(ao),
-  kbt_(0.0),
-  stride_(0),
-  welltemp_(false),
-  biasf_(-1.0),
-  isFirstStep_(true),
-  height0_(std::numeric_limits<double>::max()),
-  mw_dir_(""),
-  walkers_mpi_(false),
-  mpi_size_(0),
-  mpi_rank_(0),
-  sketch_r_(0),
-  sketch_cutoff_(0.0),
-  sketch_count_(1),
-  sketch_until_(std::numeric_limits<double>::max()),
-  frozen_(false),
-  sketch_conv_(false),
-  nonintrusive_(false),
-  deterministic_(false) {
-  bool kernel;
-  parseFlag("KERNEL_BASIS", kernel);
-  this->d_ = getNumberOfArguments();
-  if(this->d_ < 2) {
-    error("Number of arguments must be at least 2");
-  }
-  parse("FMT", this->fmt_);
-  parseVector("SIGMA", this->sigma0_);
-  if(this->sigma0_.size() != d_) {
-    error("number of arguments does not match number of SIGMA parameters");
-  }
-  parse("HEIGHT", this->height0_);
-  parse("PACE", this->stride_);
-  if(stride_ <= 0) {
-    error("frequency for hill addition is nonsensical");
-  }
-  this->hillsfname_ = "HILLS";
-  parse("FILE", this->hillsfname_);
-  parse("BIASFACTOR", this->biasf_);
-  if(this->biasf_ < 1.0 && this->biasf_ != -1.0) {
-    error("well tempered bias factor is nonsensical");
-  }
-  this->kbt_ = getkBT();
-  if(this->biasf_ >= 1.0) {
-    if(this->kbt_ == 0.0) {
-      error("Unless the MD engine passes the temperature to plumed, with well-tempered metad you must specify it using TEMP");
-    }
-    this->welltemp_ = true;
-  }
-
-  parseFlag("WALKERS_MPI", this->walkers_mpi_);
-  parse("WALKERS_DIR", this->mw_dir_);
-  if(this->walkers_mpi_ && this->mw_dir_== "") {
-    const std::string ret = std::filesystem::current_path();
-    this->mw_dir_ = ret + "/";
-    multi_sim_comm.Bcast(this->mw_dir_, 0);
-  }
-  if(this->walkers_mpi_ && this->mw_dir_ != "") {
-    this->hillsfname_ = this->mw_dir_ + "/" + this->hillsfname_;
-  }
-
-  parse("SKETCH_RANK", this->sketch_r_);
-  parse("SKETCH_CUTOFF", this->sketch_cutoff_);
-  if(this->sketch_r_ <= 0 && (this->sketch_cutoff_ <= 0.0 || this->sketch_cutoff_ > 1.0)) {
-    error("Valid SKETCH_RANK or SKETCH_CUTOFF needs to be specified");
-  }
-  parse("SKETCH_INITRANK", this->sketch_rc_);
-  if(this->sketch_rc_ <= 0) {
-    error("SKETCH_INITRANK must be positive");
-  }
-  parse("SKETCH_PACE", this->sketch_stride_);
-  if(this->sketch_stride_ <= 0) {
-    error("SKETCH_PACE must be positive");
-  }
-  std::vector<double> interval_min;
-  parseVector("INTERVAL_MIN", interval_min);
-  if(interval_min.size() != this->d_) {
-    error("Number of arguments does not match number of INTERVAL_MIN parameters");
-  }
-  std::vector<double> interval_max;
-  parseVector("INTERVAL_MAX", interval_max);
-  if(interval_max.size() != this->d_) {
-    error("Number of arguments does not match number of INTERVAL_MAX parameters");
-  }
-  int nbasis = 20;
-  parse("SKETCH_NBASIS", nbasis);
-  if(nbasis <= 1) {
-    error("SKETCH_NBASIS must be greater than 1");
-  }
-  if(!kernel && nbasis % 2 == 0) {
-    ++nbasis;
-  }
-  parse("SKETCH_ALPHA", this->sketch_alpha_);
-  if(this->sketch_alpha_ <= 0.0 || this->sketch_alpha_ > 1.0) {
-    error("SKETCH_ALPHA must be positive and no greater than 1");
-  }
-  std::vector<double> w;
-  parseVector("SKETCH_WIDTH", w);
-  if(w.size() == 0) {
-    w.assign(this->d_, 0.0);
-  }
-  if(w.size() != this->d_) {
-    error("Number of arguments does not match number of SKETCH_WIDTH parameters");
-  }
-  for (double val : w) {
-    if (val != 0.0) {
-      this->sketch_conv_ = true;
-    }
-  }
-  std::vector<double> dx;
-  parseVector("KERNEL_DX", dx);
-  if(dx.size() == 0) {
-    dx.resize(this->d_, 0.0);
-  }
-  if(dx.size() != this->d_) {
-    error("Number of arguments does not match number of KERNEL_DX parameters");
-  }
-  for(unsigned i = 0; i < this->d_; ++i) {
-    if(this->sketch_conv_ && w[i] <= 0.0) {
-      error("Gaussian smoothing requires positive WIDTH");
-    }
-    if(kernel && dx[i] < 0.0) {
-      error("Kernel basis requires positive KERNEL_DX");
-    }
-    if(interval_max[i] <= interval_min[i]) {
-      error("INTERVAL_MAX parameters need to be greater than respective INTERVAL_MIN parameters");
-    }
-    this->sketch_basis_.push_back(BasisFunc(std::make_pair(interval_min[i], interval_max[i]), nbasis, w[i], kernel, dx[i]));
-  }
-  if(kernel && this->sketch_conv_) {
-    error("kernel smoothing incompatible with kernel basis");
-  }
-  if(this->walkers_mpi_) {
-    this->mpi_size_ = multi_sim_comm.Get_size();
-    this->mpi_rank_ = multi_sim_comm.Get_rank();
-  }
-
-  parse("SKETCH_UNTIL", this->sketch_until_);
-
-  parseFlag("NONINTRUSIVE", this->nonintrusive_);
-  parseFlag("DETERMINISTIC", this->deterministic_);
-
-  if(getRestart()) {
-    std::string ttfilename = "ttsketch.h5";
-    if(this->walkers_mpi_) {
-      ttfilename = "../" + ttfilename;
-    }
-    while(true) {
-      try {
-        this->vb_ = ttRead(ttfilename, ++this->sketch_count_);
-      } catch(...) {
-        --this->sketch_count_;
-        break;
-      }
-    }
-    if(this->sketch_count_ == 1) {
-      this->vb_ = MPS();
-    }
-    if(getTime() >= this->sketch_until_) {
-      this->frozen_ = true;
-    } else {
-      IFile hills_ifile;
-      if(!hills_ifile.FileExist(this->hillsfname_)) {
-        error("The hills file cannot be found");
-      }
-      hills_ifile.open(this->hillsfname_);
-      readGaussians(&hills_ifile);
-      hills_ifile.close();
-    }
-  }
-
-  if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
-    this->hillsOfile_.link(*this);
-    this->hillsOfile_.enforceSuffix("");
-    this->hillsOfile_.open(this->hillsfname_);
-    if(this->fmt_.length() > 0) {
-      this->hillsOfile_.fmtField(this->fmt_);
-    }
-    hillsOfile_.setHeavyFlush();
-    for(unsigned i = 0; i < this->d_; ++i) {
-      hillsOfile_.setupPrintValue(getPntrToArgument(i));
-    }
-  }
-}
-
-void TTMetaD::readGaussians(IFile *ifile) {
-  std::vector<double> center(this->d_);
-  std::vector<double> sigma(this->d_);
-  double height;
-  int nhills = 0;
-
-  std::vector<Value> tmpvalues;
-  for(unsigned j = 0; j < this->d_; ++j) {
-    tmpvalues.push_back(Value(this, getPntrToArgument(j)->getName(), false));
-  }
-
-  while(scanOneHill(ifile, tmpvalues, center, sigma, height)) {
-    ++nhills;
-    if(this->welltemp_ && this->biasf_ > 1.0) {
-      height *= (this->biasf_ - 1.0) / this->biasf_;
-    }
-    this->hills_.push_back(Gaussian(height, center, sigma));
-  }
-  log << "  restarting from step " << this->sketch_count_ << "\n";
-  log << "  " << nhills << " hills retrieved\n";
-
-  log << "  Bibliography " << plumed.cite("Strand, Yang, Khoo, and Dinner, https://arxiv.org/abs/2603.13549 (2026)");
-  log << plumed.cite("Fishman, White, and Stoudenmire, https://arxiv.org/abs/2007.14822 (2020)");
-  log << "\n";
-}
-
-bool TTMetaD::scanOneHill(IFile* ifile, std::vector<Value>& tmpvalues, std::vector<double>& center, std::vector<double>& sigma, double& height) {
-  double dummy;
-  if(ifile->scanField("time", dummy)) {
-    unsigned ncv = tmpvalues.size();
-    for(unsigned i = 0; i < ncv; ++i) {
-      ifile->scanField(&tmpvalues[i]);
-      if(tmpvalues[i].isPeriodic() && !getPntrToArgument(i)->isPeriodic()) {
-        error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
-      } else if(tmpvalues[i].isPeriodic()) {
-        std::string imin, imax;
-        tmpvalues[i].getDomain(imin, imax);
-        std::string rmin, rmax;
-        getPntrToArgument(i)->getDomain(rmin, rmax);
-        if(imin != rmin || imax != rmax) {
-          error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
-        }
-      }
-      center[i] = tmpvalues[i].get();
-    }
-    for(unsigned i = 0; i < ncv; ++i) {
-      ifile->scanField("sigma_" + getPntrToArgument(i)->getName(), sigma[i]);
-    }
-
-    ifile->scanField("height", height);
-    ifile->scanField("biasf", dummy);
-    ifile->scanField();
-    return true;
-  } else {
-    return false;
-  }
-}
-
-void TTMetaD::writeGaussian(const Gaussian& hill, OFile&file) {
-  file.printField("time", getTimeStep() * getStep());
-  for(unsigned i = 0; i < this->d_; ++i) {
-    file.printField(getPntrToArgument(i), hill.center[i]);
-  }
-  for(unsigned i = 0; i < this->d_; ++i) {
-    file.printField("sigma_" + getPntrToArgument(i)->getName(), hill.sigma[i]);
-  }
-  double height = hill.height;
-  if(this->welltemp_ && this->biasf_ > 1.0) {
-    height *= this->biasf_ / (this->biasf_ - 1.0);
-  }
-  file.printField("height", height).printField("biasf", this->biasf_);
-  file.printField();
-}
-
-// Called every MD step: evaluate total bias (TT + remaining Gaussians) and set forces.
-void TTMetaD::calculate() {
-  std::vector<double> cv(this->d_);
-  for(unsigned i = 0; i < this->d_; ++i) {
-    cv[i] = getArgument(i);
-  }
-
-  std::vector<double> der(this->d_, 0.0);
-
-  double ene = getBiasAndDerivatives(cv, der);
-  setBias(ene);
-  for(unsigned i = 0; i < this->d_; ++i) {
-    setOutputForce(i, -der[i]);
-  }
-}
-
-// Called every MD step after forces are applied.
-// Two separate triggers:
-//   1. Every sketch_stride_ steps: run paraSketch() to compress accumulated hills into vb_,
-//      clear the hills list, and rewind the HILLS file for the next interval.
-//   2. Every stride_ steps: deposit a new Gaussian hill and write it to the HILLS file.
-void TTMetaD::update() {
-  bool nowAddATT;
-  if(getStep() % this->sketch_stride_ == 0 && !this->isFirstStep_ && !this->frozen_) {
-    nowAddATT = true;
-    if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
-      this->hillsOfile_.flush();
-    }
-  } else {
-    nowAddATT = false;
-  }
-
-  if(nowAddATT) {
-    if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
-      unsigned N = this->hills_.size();
-      log << "Sample limits\n";
-      for(unsigned i = 0; i < this->d_; ++i) {
-        auto [large, small] = this->sketch_basis_[i].dom();
-        for(unsigned j = 0; j < N; ++j) {
-          if(this->hills_[j].center[i] > large) {
-            large = this->hills_[j].center[i];
-          }
-          if(this->hills_[j].center[i] < small) {
-            small = this->hills_[j].center[i];
-          }
-        }
-        log << small << " " << large << "\n";
-      }
-
-      log << "\nEmpirical means:\n";
-      Matrix<double> sigmahat(this->d_, this->d_);
-      std::vector<double> muhat(this->d_, 0.0);
-      for(unsigned k = 0; k < this->d_; ++k) {
-        for(unsigned j = 0; j < N; ++j) {
-          muhat[k] += this->hills_[j].center[k] / N;
-        }
-        log << muhat[k] << " ";
-      }
-      log << "\nEmpirical covariance matrix:\n";
-      for(unsigned k = 0; k < this->d_; ++k) {
-        for(unsigned l = k; l < this->d_; ++l) {
-          sigmahat(k, l) = sigmahat(l, k) = 0.0;
-          for(unsigned j = 0; j < N; ++j) {
-            sigmahat(k, l) += (this->hills_[j].center[k] - muhat[k]) * (this->hills_[j].center[l] - muhat[l]) / (N - 1);
-          }
-          sigmahat(l, k) = sigmahat(k, l);
-        }
-      }
-      matrixOut(log, sigmahat);
-
-      // record current bias at hill centers before update, for computing relative error after
-      std::vector<double> A0(N);
-      Matrix<double> x(N, this->d_);
-      for(unsigned i = 0; i < N; ++i) {
-        for(unsigned j = 0; j < this->d_; ++j) {
-          x(i, j) = this->hills_[i].center[j];
-        }
-        A0[i] = getBias(this->hills_[i].center);
-      }
-
-      log << "\nStarting TT-sketch...\n";
-      log.flush();
-      paraSketch();
-      ++this->sketch_count_;
-
-      this->hills_.clear();
-
-      // compute relative L2 approximation error at hill centers: ||V_new - V_old|| / ||V_old||
-      std::vector<double> diff(N);
-      for(unsigned i = 0; i < N; ++i) {
-        std::vector<double> xi(this->d_);
-        for(unsigned j = 0; j < this->d_; ++j) {
-          xi[j] = x(i, j);
-        }
-        diff[i] = getBias(xi);
-      }
-      std::transform(diff.begin(), diff.end(), A0.begin(), diff.begin(), std::minus<double>());
-      log << "Relative l2 error = " << sqrt(norm(diff) / norm(A0)) << "\n\n";
-      log.flush();
-
-      std::string ttfilename = "ttsketch.h5";
-      if(this->walkers_mpi_) {
-        ttfilename = "../" + ttfilename;
-      }
-      ttWrite(ttfilename, this->vb_, this->sketch_count_);
-    }
-
-    if(this->walkers_mpi_) {
-      multi_sim_comm.Bcast(this->sketch_count_, 0);
-      if(this->mpi_rank_ != 0) {
-        this->hills_.clear();
-        this->vb_ = ttRead("../ttsketch.h5", this->sketch_count_);
-      }
-    }
-    if(getTime() >= this->sketch_until_) {
-      this->frozen_ = true;
-    } else if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
-      this->hillsOfile_.rewind();
-      this->hillsOfile_.clearFields();
-      if(this->fmt_.length() > 0) {
-        this->hillsOfile_.fmtField(this->fmt_);
-      }
-      hillsOfile_.setHeavyFlush();
-      for(unsigned i = 0; i < this->d_; ++i) {
-        hillsOfile_.setupPrintValue(getPntrToArgument(i));
-      }
-    }
-  }
-
-  bool nowAddAHill;
-  if(getStep() % this->stride_ == 0 && !isFirstStep_ && !this->frozen_) {
-    nowAddAHill = true;
-  } else {
-    nowAddAHill = false;
-    this->isFirstStep_ = false;
-  }
-
-  std::vector<double> cv(this->d_);
-  for(unsigned i = 0; i < this->d_; ++i) {
-    cv[i] = getArgument(i);
-  }
-
-  if(nowAddAHill) {
-    double height = getHeight(cv);
-
-    if(this->walkers_mpi_) {
-      std::vector<double> all_cv(this->mpi_size_ * this->d_, 0.0);
-      std::vector<double> all_sigma(this->mpi_size_ * this->sigma0_.size(), 0.0);
-      std::vector<double> all_height(this->mpi_size_, 0.0);
-      multi_sim_comm.Allgather(cv, all_cv);
-      multi_sim_comm.Allgather(this->sigma0_, all_sigma);
-      multi_sim_comm.Allgather(height * (this->biasf_ > 1.0 ? this->biasf_ / (this->biasf_ - 1.0) : 1.0), all_height);
-
-      for(int i = 0; i < this->mpi_size_; i++) {
-        std::vector<double> cv_now(this->d_);
-        std::vector<double> sigma_now(this->sigma0_.size());
-        for(unsigned j = 0; j < this->d_; j++) {
-          cv_now[j] = all_cv[i * this->d_ + j];
-        }
-        for(unsigned j = 0; j < this->sigma0_.size(); j++) {
-          sigma_now[j] = all_sigma[i * this->sigma0_.size() + j];
-        }
-        double fact = (this->biasf_ > 1.0 ? (this->biasf_ - 1.0) / this->biasf_ : 1.0);
-        Gaussian newhill(all_height[i] * fact, cv_now, sigma_now);
-        this->hills_.push_back(newhill);
-        if(this->mpi_rank_ == 0) {
-          writeGaussian(newhill, hillsOfile_);
-        }
-      }
-    } else {
-      Gaussian newhill(height, cv, this->sigma0_);
-      this->hills_.push_back(newhill);
-      writeGaussian(newhill, hillsOfile_);
-    }
-  }
-
-  if(getStep() % this->sketch_stride_ == 1 && !this->frozen_) {
-    log << "Vbias update " << this->sketch_count_ << "...\n\n";
-    log.flush();
-  }
-}
-
-double TTMetaD::getHeight(const std::vector<double>& cv) {
-  double height = this->height0_;
-  if(this->welltemp_) {
-    double vbias = getBias(cv);
-    if(this->biasf_ > 1.0) {
-      height = this->height0_ * exp(-vbias / (this->kbt_ * (this->biasf_ - 1.0)));
-    } else {
-      height = this->height0_ * exp(-vbias / this->kbt_);
-    }
-  }
-  return height;
-}
-
-// Total bias = TT approximation of accumulated hills + sum of recently deposited Gaussians.
-double TTMetaD::getBias(const std::vector<double>& cv) {
-  double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
-  unsigned nt = OpenMP::getNumThreads();
-  #pragma omp parallel num_threads(nt)
-  {
-    #pragma omp for reduction(+:bias) nowait
-    for(unsigned i = 0; i < hills_.size(); ++i) {
-      bias += evaluateGaussian(cv, this->hills_[i]);
-    }
-  }
-  return bias;
-}
-
-double TTMetaD::getBiasAndDerivatives(const std::vector<double>& cv, std::vector<double>& der) {
-  double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
-  if(length(this->vb_) != 0) {
-    der = ttGrad(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
-  }
-  unsigned nt = OpenMP::getNumThreads();
-  if(this->hills_.size() < 2 * nt || nt == 1) {
-    std::vector<double> dp(this->d_);
-    for(unsigned i = 0; i < this->hills_.size(); ++i) {
-      bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], der, dp);
-    }
-  } else {
-    #pragma omp parallel num_threads(nt)
-    {
-      std::vector<double> omp_deriv(this->d_, 0.0);
-      std::vector<double> dp(this->d_);
-      #pragma omp for reduction(+:bias) nowait
-      for(unsigned i = 0; i < this->hills_.size(); ++i) {
-        bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], omp_deriv, dp);
-      }
-      #pragma omp critical
-      for(unsigned i = 0; i < this->d_; ++i) {
-        der[i] += omp_deriv[i];
-      }
-    }
-  }
-  return bias;
-}
-
-double TTMetaD::evaluateGaussian(const std::vector<double>& cv, const Gaussian& hill) {
-  double dp2 = 0.0;
-  for(unsigned i = 0; i < this->d_; i++) {
-    double dp = difference(i, hill.center[i], cv[i]) * hill.invsigma[i];
-    dp2 += dp * dp;
-  }
-  dp2 *= 0.5;
-
-  double bias = 0.0;
-  if(dp2 < dp2cutoff) {
-    bias = hill.height * exp(-dp2);
-  }
-
-  return bias;
-}
-
-double TTMetaD::evaluateGaussianAndDerivatives(const std::vector<double>& cv, const Gaussian& hill, std::vector<double>& der, std::vector<double>& dp) {
-  double dp2 = 0.0;
-  double bias = 0.0;
-  for(unsigned i = 0; i < this->d_; i++) {
-    dp[i] = difference(i, hill.center[i], cv[i]) * hill.invsigma[i];
-    dp2 += dp[i] * dp[i];
-  }
-  dp2 *= 0.5;
-  if(dp2 < dp2cutoff) {
-    bias = hill.height * exp(-dp2);
-    for(unsigned i = 0; i < this->d_; i++) {
-      der[i] -= bias * dp[i] * hill.invsigma[i];
-    }
-  }
-
-  return bias;
-}
-
-// TT-Sketch algorithm: compresses the N accumulated Gaussian hills plus the previous
-// TT bias vb_ into a new low-rank TT approximation G, which becomes the updated vb_.
-//
-// Overview (see paper Algorithm 2):
-//   1. createTTCoeff(): build a random sketch coefficient TT `coeff` of rank sketch_rc_.
-//   2. intBasisSample(): compute M[i][j,k] = <phi_k, g_j>_{dim i}, the inner product of
-//      each basis function with each Gaussian along dimension i.
-//   3. formTensorMoment(): contract coeff with M to get the tensor moment Bemp and
-//      left/right environment tensors envi_L, envi_R needed to form the normal equations.
-//   4. For each bond between cores k and k+1:
-//      - Form the cross-gram matrix A from envi_L and envi_R.
-//      - SVD A to get the rank-trimmed projection V (truncating to sketch_r_ or sketch_cutoff_).
-//   5. Recover TT cores G from Bemp and the pseudo-inverse of U*S.
-//   6. Apply Gram matrix correction (kernel basis only) to convert to dual basis coefficients.
-void TTMetaD::paraSketch() {
-  unsigned N = this->hills_.size();
-  auto coeff = createTTCoeff();
-  auto [M, is] = intBasisSample(siteInds(coeff));
-  MPS G(this->d_);
-
-  auto [Bemp, envi_L, envi_R] = formTensorMoment(M, coeff, is);
-  MPS Bemp_Vb;
-  std::vector<ITensor> envi_L_Vb;
-  std::vector<ITensor> envi_R_Vb;
-  if(this->sketch_count_ != 1) {
-    if(this->nonintrusive_) {
-      // Non-intrusive variant: accumulate tensor moments directly across sketches.
-      // B_prev_ stores the sum of Bemp from all prior intervals; re-index its bond/site
-      // indices to match the current coeff TT's indices before adding.
-      auto sites = siteInds(Bemp);
-      auto sites_prev = siteInds(this->B_prev_);
-      auto links = linkInds(Bemp);
-      auto links_prev = linkInds(this->B_prev_);
-      for(unsigned i = 1; i <= this->d_; ++i) {
-        this->B_prev_.ref(i) *= delta(sites(i), sites_prev(i));
-        if(i != 1) {
-          this->B_prev_.ref(i) *= delta(links(i - 1), links_prev(i - 1));
-        }
-        if (i != this->d_) {
-          this->B_prev_.ref(i) *= delta(links(i), links_prev(i));
-        }
-        Bemp.ref(i) += this->B_prev_(i);
-      }
-    } else {
-      // Intrusive variant: project the previous TT bias vb_ onto the current sketch's
-      // basis by pre-multiplying each core by the Gram matrix (kernel basis) or the
-      // convolution inner-product matrix (Fourier conv mode), then add its tensor moment.
-      if(this->sketch_basis_[0].kernel()) {
-        // Convert vb_ from dual basis to standard basis by applying Gram matrix per core
-        for(unsigned i = 1; i <= this->d_; ++i) {
-          auto s = siteIndex(this->vb_, i);
-          ITensor gram(s, prime(s));
-          for(int j = 1; j <= dim(s); ++j) {
-            for(int l = 1; l <= dim(s); ++l) {
-              gram.set(s = j, prime(s) = l, this->sketch_basis_[i - 1].gram()(j - 1, l - 1));
-            }
-          }
-          this->vb_.ref(i) *= gram;
-          this->vb_.ref(i).noPrime();
-        }
-      }
-      if(this->sketch_conv_) {
-        // Apply Fourier convolution damping factors to vb_ to account for basis smoothing
-        for(unsigned i = 1; i <= this->d_; ++i) {
-          double L = (this->sketch_basis_[i - 1].dom().second - this->sketch_basis_[i - 1].dom().first) / 2;
-          double w = this->sketch_basis_[i - 1].w();
-          auto s = siteIndex(this->vb_, i);
-          ITensor inner(s, prime(s));
-          for(int j = 1; j < dim(s); ++j) {
-            inner.set(s = j, prime(s) = j, exp(-pow(M_PI * w * (j / 2), 2) / (2 * pow(L, 2))));
-          }
-          this->vb_.ref(i) *= inner;
-          this->vb_.ref(i).noPrime();
-        }
-      }
-      auto vbresult = formTensorMomentVb(coeff);
-      Bemp_Vb = std::get<0>(vbresult);
-      envi_L_Vb = std::get<1>(vbresult);
-      envi_R_Vb = std::get<2>(vbresult);
-      for(unsigned i = 1; i <= this->d_; ++i) {
-        Bemp.ref(i) += Bemp_Vb(i);
-      }
-    }
-  }
-  if(this->nonintrusive_) {
-    this->B_prev_ = Bemp;
-    if(this->sketch_count_ == 1) {
-      this->A_prev_.resize(this->d_ - 1);
-    }
-  }
-  // For each bond between cores k-1 and k, form the cross-gram matrix A = L^T * R,
-  // where L = envi_L[k] and R = envi_R[k-2] are the left/right environment projections
-  // of coeff onto the sample indices. SVD A to find the rank-trimmed subspace V.
-  auto links = linkInds(coeff);
-  std::vector<ITensor> U(this->d_), S(this->d_), V(this->d_);
-  std::vector<Index> links_trimmed;
-  for(unsigned core_id = 2; core_id <= this->d_; ++core_id) {
-    int rank = dim(links(core_id - 1));
-    Matrix<double> LMat(N, rank), RMat(N, rank);
-    for(unsigned i = 1; i <= N; ++i) {
-      for(int j = 1; j <= rank; ++j) {
-        LMat(i - 1, j - 1) = envi_L[core_id - 1].elt(is(core_id) = i, links(core_id - 1) = j);
-        RMat(i - 1, j - 1) = envi_R[core_id - 2].elt(is(core_id - 1) = i, links(core_id - 1) = j);
-      }
-    }
-    Matrix<double> Lt, AMat, PMat, AMat_Vb;
-    transpose(LMat, Lt);
-    mult(Lt, RMat, AMat);
-
-    if(this->sketch_count_ != 1 && !this->nonintrusive_) {
-      auto ivb = linkIndex(this->vb_, core_id - 1);
-      int rank_vb = dim(ivb);
-      LMat = Matrix<double>(rank_vb, rank);
-      RMat = Matrix<double>(rank_vb, rank);
-      for(int i = 1; i <= rank_vb; ++i) {
-        for(int j = 1; j <= rank; ++j) {
-          LMat(i - 1, j - 1) = envi_L_Vb[core_id - 1].elt(ivb = i, links(core_id - 1) = j);
-          RMat(i - 1, j - 1) = envi_R_Vb[core_id - 2].elt(ivb = i, links(core_id - 1) = j);
-        }
-      }
-      transpose(LMat, Lt);
-      mult(Lt, RMat, AMat_Vb);
-    }
-
-    ITensor A(prime(links(core_id - 1)), links(core_id - 1));
-    for(int i = 1; i <= rank; ++i) {
-      for(int j = 1; j <= rank; ++j) {
-        A.set(prime(links(core_id - 1)) = i, links(core_id - 1) = j, AMat(i - 1, j - 1));
-      }
-    }
-    if(this->sketch_count_ != 1) {
-      if(this->nonintrusive_) {
-        this->A_prev_[core_id - 2] *= delta(this->A_prev_[core_id - 2].index(1), A.index(1));
-        this->A_prev_[core_id - 2] *= delta(this->A_prev_[core_id - 2].index(2), A.index(2));
-        A += this->A_prev_[core_id - 2];
-      } else {
-        ITensor A_Vb(prime(links(core_id - 1)), links(core_id - 1));
-        for(int i = 1; i <= rank; ++i) {
-          for(int j = 1; j <= rank; ++j) {
-            A_Vb.set(prime(links(core_id - 1)) = i, links(core_id - 1) = j, AMat_Vb(i - 1, j - 1));
-          }
-        }
-        A += A_Vb;
-      }
-    }
-    if(this->nonintrusive_) {
-      this->A_prev_[core_id - 2] = A;
-    }
-    auto original_link_tags = tags(links(core_id - 1));
-    V[core_id - 1] = ITensor(links(core_id - 1));
-    if(this->sketch_r_ > 0) {
-      svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1],
-      {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags, "MaxDim=", this->sketch_r_});
-    } else {
-      svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1], {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags});
-    }
-    links_trimmed.push_back(commonIndex(S[core_id - 1], V[core_id - 1]));
-  }
-
-  // Recover TT cores G from Bemp and the trimmed subspaces.
-  // G[1] = Bemp[1] * V[1] (project first core onto rank-trimmed subspace)
-  // G[k] = pinv(U[k]*S[k]) * Bemp[k] * V[k]  for 2 <= k < d
-  // G[d] = pinv(U[d]*S[d]) * Bemp[d]
-  G.ref(1) = Bemp(1) * V[1];
-  for(unsigned core_id = 2; core_id <= this->d_; ++core_id) {
-    int rank = dim(links(core_id - 1)), rank_trimmed = dim(links_trimmed[core_id - 2]);
-    ITensor A = U[core_id - 1] * S[core_id - 1];
-    ITensor Pinv(links_trimmed[core_id - 2], links(core_id - 1));
-    Matrix<double> AMat(rank, rank_trimmed), PMat;
-    for(int i = 1; i <= rank; ++i) {
-      for(int j = 1; j <= rank_trimmed; ++j) {
-        AMat(i - 1, j - 1) = A.elt(prime(links(core_id - 1)) = i, links_trimmed[core_id - 2] = j);
-      }
-    }
-    pseudoInvert(AMat, PMat);
-
-    for(int i = 1; i <= rank_trimmed; ++i) {
-      for(int j = 1; j <= rank; ++j) {
-        Pinv.set(links_trimmed[core_id - 2] = i, links(core_id - 1) = j, PMat(i - 1, j - 1));
-      }
-    }
-    G.ref(core_id) = Pinv * Bemp(core_id);
-    if(core_id != this->d_) {
-      G.ref(core_id) *= V[core_id];
-    }
-  }
-
-  log << "Final ranks ";
-  for(unsigned i = 1; i < this->d_; ++i) {
-    log << dim(linkIndex(G, i)) << " ";
-  }
-  log << "\n";
-  log.flush();
-
-  if(this->sketch_basis_[0].kernel()) {
-    // Apply pseudo-inverse of the Gram matrix to each core to convert from the
-    // primal Gram representation back to dual basis coefficients (G^+ * G_core).
-    for(unsigned i = 1; i <= this->d_; ++i) {
-      auto s = siteIndex(G, i);
-      ITensor ginv(s, prime(s));
-      for(int j = 1; j <= dim(s); ++j) {
-        for(int l = 1; l <= dim(s); ++l) {
-          ginv.set(s = j, prime(s) = l, this->sketch_basis_[i - 1].ginv()(j - 1, l - 1));
-        }
-      }
-      G.ref(i) *= ginv;
-      G.ref(i).noPrime();
-    }
-  }
-
-  this->vb_ = G;
-}
-
-// Build the random sketch coefficient TT `coeff` of bond dimension sketch_rc_.
-// Each element is drawn from N(0,1); then each core is multiplied by a diagonal
-// matrix diag(1, alpha, alpha, ...) so the constant basis function (pos=1) has
-// full weight while all other harmonics are scaled down by sketch_alpha_. This
-// ensures the sketch captures the large constant component of the bias accurately.
-MPS TTMetaD::createTTCoeff() const {
-  std::default_random_engine generator(this->deterministic_ ? 42u : static_cast<unsigned int>(time(nullptr)));
-  std::normal_distribution<double> distribution(0.0, 1.0);
-  int n = this->sketch_basis_[0].nbasis();
-  auto sites = SiteSet(this->d_, n);
-  auto coeff = MPS(sites, this->sketch_rc_);
-  for(int j = 1; j <= n; ++j) {
-    for(int k = 1; k <= this->sketch_rc_; ++k) {
-      coeff.ref(1).set(sites(1) = j, linkIndex(coeff, 1) = k, distribution(generator));
-    }
-  }
-  for(unsigned i = 2; i <= this->d_ - 1; ++i) {
-    for(int j = 1; j <= n; ++j) {
-      for(int k = 1; k <= this->sketch_rc_; ++k) {
-        for(int l = 1; l <= this->sketch_rc_; ++l) {
-          coeff.ref(i).set(sites(i) = j, linkIndex(coeff, i - 1) = k, linkIndex(coeff, i) = l, distribution(generator));
-        }
-      }
-    }
-  }
-  for(int j = 1; j <= n; ++j) {
-    for(int k = 1; k <= this->sketch_rc_; ++k) {
-      coeff.ref(this->d_).set(sites(this->d_) = j, linkIndex(coeff, this->d_ - 1) = k, distribution(generator));
-    }
-  }
-  for(unsigned i = 1; i <= this->d_; ++i) {
-    auto s = sites(i);
-    auto sp = prime(s);
-    std::vector<double> Avec(n, this->sketch_alpha_);
-    Avec[0] = 1.0;
-    auto A = diagITensor(Avec, s, sp);
-    coeff.ref(i) *= A;
-    coeff.ref(i).noPrime();
-  }
-  return coeff;
-}
-
-// Compute the matrix M[dim i][sample j, basis k] = <phi_k, g_j>_{dim i}, the
-// analytical inner product of each 1D basis function with the 1D marginal of each
-// Gaussian hill along dimension i. The d-dimensional Gaussian factorizes as a
-// product of d 1D Gaussians, so we distribute height as h^(1/d) per dimension.
-// These integrals are computed in closed form:
-//   - Kernel basis: convolution of the kernel function with the 1D Gaussian hill
-//   - Fourier basis: Fourier transform of the 1D Gaussian (exponential decay * harmonic)
-// Returns M as a vector of ITensors (one per dimension) and a new IndexSet is_new
-// where is_new(i) indexes the N sample points for dimension i.
-std::pair<std::vector<ITensor>, IndexSet> TTMetaD::intBasisSample(const IndexSet& is) const {
-  unsigned N = this->hills_.size();
-  int nb = this->sketch_basis_[0].nbasis();
-  auto sites_new = SiteSet(this->d_, N);
-  std::vector<ITensor> M;
-  std::vector<Index> is_new;
-  for(unsigned i = 1; i <= this->d_; ++i) {
-    double L = (this->sketch_basis_[i - 1].dom().second - this->sketch_basis_[i - 1].dom().first) / 2;
-    double a = (this->sketch_basis_[i - 1].dom().second + this->sketch_basis_[i - 1].dom().first) / 2;
-    M.push_back(ITensor(sites_new(i), is(i)));
-    is_new.push_back(sites_new(i));
-    for(unsigned j = 1; j <= N; ++j) {
-      double x = this->hills_[j - 1].center[i - 1];
-      double w = this->hills_[j - 1].sigma[i - 1];
-      double h = pow(this->hills_[j - 1].height, 1.0 / this->d_);  // per-dimension d-th root of height
-      for(int pos = 1; pos <= nb; ++pos) {
-        double result = 0.0;
-        if(this->sketch_basis_[0].kernel()) {
-          if(pos == 1) {
-            // <1, g_j> = integral of 1D Gaussian = sqrt(2*pi)*sigma
-            result = h * sqrt(2 * M_PI) * w;
-          } else {
-            // <kernel_c, g_j> = conv of two Gaussians (widths dx and w), with periodic images
-            double c = this->sketch_basis_[i - 1].center(pos - 1);
-            double dx = this->sketch_basis_[i - 1].dx();
-            for(int k = -1; k <= 1; ++k) {
-              result += exp(-pow(x - c + 2 * k * L, 2) / (2 * (pow(dx, 2) + pow(w, 2)))) * h * sqrt(2 * M_PI) * w /
-                        (sqrt(1 / pow(dx, 2) + 1 / pow(w, 2)) * w);
-            }
-          }
-        } else {
-          if(pos == 1) {
-            // <1/sqrt(2L), g_j> = sqrt(pi/L) * sigma (Gaussian times constant)
-            result = h * sqrt(M_PI / L) * w;
-          } else if(pos % 2 == 0) {
-            // <cos harmonic, g_j>: Fourier transform of Gaussian times exponential damping
-            result = exp(-pow(M_PI * w * (pos / 2), 2) / (2 * pow(L, 2))) * h * sqrt(2 * M_PI / L) * w * cos(M_PI * (x - a) * (pos / 2) / L);
-          } else {
-            result = exp(-pow(M_PI * w * (pos / 2), 2) / (2 * pow(L, 2))) * h * sqrt(2 * M_PI / L) * w * sin(M_PI * (x - a) * (pos / 2) / L);
-          }
-        }
-        M.back().set(sites_new(i) = j, is(i) = pos, result);
-      }
-    }
-  }
-  return std::make_pair(M, IndexSet(is_new));
-}
-
-// Compute the tensor moment B and environment tensors for the new Gaussian hills.
-//
-// First, L = coeff with each core's basis index replaced by the sample index via M:
-//   L[i] = coeff[i] * M[i], so L has (sample_index, bond_left, bond_right) indices.
-//
-// envi_L[i] = partial contraction of L(1)...L(i) summed over sample indices 1..N,
-//   yielding shape (sample_index_{i+1}, link_i). Used to form the left half of A.
-// envi_R[i] = partial contraction of L(i+2)...L(d) summed over sample indices,
-//   yielding shape (sample_index_{i+1}, link_{i+1}). Used to form the right half of A.
-//
-// B[k] = sum_j envi_L[k-1][j, :] * envi_R[k-1][j, :] * M[k][:, basis_k],
-//   which is the "tensor moment" for core k: the sketch of the Gaussian sum projected
-//   onto the basis functions of dimension k and the sketch subspaces of all other dims.
-std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorMoment(const std::vector<ITensor>& M, const MPS& coeff, const IndexSet& is) {
-  int N = dim(is(1));
-  auto links = linkInds(coeff);
-  // L[i] = coeff[i] with basis index contracted against M[i] -> sample index
-  auto L = coeff;
-
-  for(unsigned i = 1; i <= this->d_; ++i) {
-    L.ref(i) *= M[i - 1];
-  }
-
-  // Build left environments: envi_L[i] accumulates the contraction of L(1)..L(i)
-  // over the shared sample index, leaving the next sample index and link free.
-  std::vector<ITensor> envi_L(this->d_);
-  envi_L[1] = L(1) * delta(is(1), is(2));
-  for(unsigned i = 2; i < this->d_; ++i) {
-    int rankl = dim(links(i - 1));
-    int rankr = dim(links(i));
-    envi_L[i] = ITensor(is(i + 1), links(i));
-    for(int j = 1; j <= N; ++j) {
-      for(int k = 1; k <= rankr; ++k) {
-        ITensor LHS(links(i - 1)), RHS(links(i - 1));
-        for(int ii = 1; ii <= rankl; ++ii) {
-          LHS.set(links(i - 1) = ii, envi_L[i - 1].elt(is(i) = j, links(i - 1) = ii));
-          RHS.set(links(i - 1) = ii, L(i).elt(links(i - 1) = ii, is(i) = j, links(i) = k));
-        }
-        envi_L[i].set(is(i + 1) = j, links(i) = k, elt(LHS * RHS));
-      }
-    }
-  }
-
-  // Build right environments: envi_R[i] accumulates L(i+2)..L(d) right-to-left.
-  std::vector<ITensor> envi_R(this->d_);
-  envi_R[this->d_ - 2] = L(this->d_) * delta(is(this->d_), is(this->d_ - 1));
-  for(int i = this->d_ - 3; i >= 0; --i) {
-    int rankl = dim(links(i + 1));
-    int rankr = dim(links(i + 2));
-    envi_R[i] = ITensor(is(i + 1), links(i + 1));
-    for(int j = 1; j <= N; ++j) {
-      for(int k = 1; k <= rankl; ++k) {
-        ITensor LHS(links(i + 2)), RHS(links(i + 2));
-        for(int ii = 1; ii <= rankr; ++ii) {
-          LHS.set(links(i + 2) = ii, envi_R[i + 1].elt(is(i + 2) = j, links(i + 2) = ii));
-          RHS.set(links(i + 2) = ii, L(i + 2).elt(links(i + 2) = ii, is(i + 2) = j, links(i + 1) = k));
-        }
-        envi_R[i].set(is(i + 1) = j, links(i + 1) = k, elt(LHS * RHS));
-      }
-    }
-  }
-
-  // Assemble tensor moment B: for each core, combine the left and right environments
-  // element-wise over sample indices, then contract with M to restore basis indices.
-  MPS B(this->d_);
-  B.ref(1) = envi_R[0] * M[0];
-  for(unsigned core_id = 2; core_id < this->d_; ++core_id) {
-    int rankl = dim(links(core_id - 1));
-    int rankr = dim(links(core_id));
-    B.ref(core_id) = ITensor(links(core_id - 1), is(core_id), links(core_id));
-    for(int i = 1; i <= rankl; ++i) {
-      for(int j = 1; j <= rankr; ++j) {
-        for(int k = 1; k <= N; ++k) {
-          double Lelt = envi_L[core_id - 1].elt(is(core_id) = k, links(core_id - 1) = i);
-          double Relt = envi_R[core_id - 1].elt(is(core_id) = k, links(core_id) = j);
-          B.ref(core_id).set(links(core_id - 1) = i, is(core_id) = k, links(core_id) = j, Lelt * Relt);
-        }
-      }
-    }
-    B.ref(core_id) *= M[core_id - 1];
-  }
-  B.ref(this->d_) = envi_L[this->d_ - 1] * M[this->d_ - 1];
-
-  return std::make_tuple(B, envi_L, envi_R);
-}
-
-// Compute the tensor moment B and environments for the previous TT bias vb_.
-// This is the intrusive variant's contribution from the prior accumulated bias.
-// Analogous to formTensorMoment but uses vb_ in place of M*coeff: the "samples"
-// are the vb_ bond indices, and the environments are formed by contracting coeff
-// with vb_ core by core. The result has the same structure as Bemp from the new hills.
-std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorMomentVb(const MPS& coeff) {
-  // align vb_ site indices with coeff's site indices for contraction
-  for(unsigned i = 1; i <= this->d_; ++i) {
-    this->vb_.ref(i) *= delta(siteIndex(this->vb_, i), siteIndex(coeff, i));
-  }
-  std::vector<ITensor> envi_L(this->d_);
-  envi_L[1] = coeff(1) * this->vb_(1);
-  for(unsigned i = 2; i < this->d_; ++i) {
-    envi_L[i] = envi_L[i - 1] * coeff(i) * this->vb_(i);
-  }
-
-  std::vector<ITensor> envi_R(this->d_);
-  envi_R[this->d_ - 2] = coeff(this->d_) * this->vb_(this->d_);
-  for(int i = this->d_ - 3; i >= 0; --i) {
-    envi_R[i] = envi_R[i + 1] * coeff(i + 2) * this->vb_(i + 2);
-  }
-
-  MPS B(this->d_);
-  B.ref(1) = this->vb_(1) * envi_R[0];
-  for(unsigned core_id = 2; core_id < this->d_; ++core_id) {
-    B.ref(core_id) = envi_L[core_id - 1] * this->vb_(core_id) * envi_R[core_id - 1];
-  }
-  B.ref(this->d_) = envi_L[this->d_ - 1] * this->vb_(this->d_);
-
-  return std::make_tuple(B, envi_L, envi_R);
-}
-
-}
-}
-#endif // defined(__PLUMED_HAS_LIBITENSOR) && defined(__PLUMED_HAS_LIBHDF5)
+/* Copyright (c) 2026, Nils E. Strand, Siyao Yang, Yuehaw Khoo, and Aaron R. Dinner
+ *
+ * Tensor train (TT)-metadynamics.
+ * https://arxiv.org/abs/2603.13549
+ * See module.md for installation instructions.
+ */
+
+#include "TTHelper.h"
+#ifdef __PLUMED_HAS_LIBITENSOR
+#ifdef __PLUMED_HAS_LIBHDF5
+#include "bias/Bias.h"
+#include "core/ActionRegister.h"
+#include "core/ActionSet.h"
+#include "core/PlumedMain.h"
+#include "tools/Exception.h"
+#include "tools/Communicator.h"
+#include "tools/File.h"
+#include "tools/OpenMP.h"
+
+using namespace itensor;
+using namespace PLMD::bias;
+
+namespace PLMD {
+namespace ttsketch {
+
+//+PLUMEDOC BIAS TTMETAD
+/*
+Performs metadynamics with a tensor-train representation of the bias potential.
+
+This action runs metadynamics while periodically compressing the accumulated
+Gaussian hills into a low-rank tensor train (TT) using the TT-Sketch algorithm.
+The TT representation keeps memory and evaluation cost bounded throughout long
+simulations, in contrast to standard metadynamics where cost grows linearly with
+the number of deposited hills.
+
+## Examples
+
+The following input runs TT-MetaD on two distances. Hills are deposited every
+500 steps and compressed into a TT every 1e6 steps with target rank 10.
+
+```plumed
+d1: DISTANCE ATOMS=1,2
+d2: DISTANCE ATOMS=3,4
+TTMETAD ...
+  ARG=d1,d2
+  SIGMA=0.1,0.1
+  HEIGHT=1.0
+  PACE=500
+  SKETCH_PACE=1000000
+  SKETCH_RANK=10
+  SKETCH_INITRANK=50
+  INTERVAL_MIN=0.0,0.0
+  INTERVAL_MAX=3.0,3.0
+  FILE=HILLS
+  LABEL=tt
+... TTMETAD
+PRINT ARG=tt.bias FILE=COLVAR STRIDE=100
+```
+
+*/
+//+ENDPLUMEDOC
+
+class TTMetaD : public Bias {
+
+private:
+  struct Gaussian {
+    double height;
+    std::vector<double> center;
+    std::vector<double> sigma;
+    std::vector<double> invsigma;
+    Gaussian(const double h, const std::vector<double>& c, const std::vector<double>& s)
+      : height(h), center(c), sigma(s), invsigma(s) {
+      for(unsigned i = 0; i < invsigma.size(); ++i) {
+        if(abs(invsigma[i]) > 1.e-20) {
+          invsigma[i] = 1.0 / invsigma[i];
+        } else {
+          invsigma[i] = 0.0;
+        }
+      }
+    }
+  };
+  // standard metadynamics parameters
+  double kbt_;
+  int stride_;
+  bool welltemp_;
+  double biasf_;
+  std::string fmt_;
+  bool isFirstStep_;
+  double height0_;
+  std::vector<double> sigma0_;
+  std::vector<Gaussian> hills_;   // Gaussians deposited since the last TT update
+  OFile hillsOfile_;
+  std::string mw_dir_;
+  std::string hillsfname_;
+  bool walkers_mpi_;
+  int mpi_size_;
+  int mpi_rank_;
+  // TT-sketch parameters
+  unsigned d_;               // number of CV dimensions
+  int sketch_rc_;            // initial (sketch) rank for the random TT coefficient tensor
+  int sketch_r_;             // target TT bond dimension after SVD truncation (0 = use cutoff)
+  double sketch_cutoff_;     // relative SVD truncation threshold (used when sketch_r_=0)
+  int sketch_stride_;        // steps between TT bias updates (tau in the paper)
+  double sketch_alpha_;      // weight for non-constant basis functions in the sketch coefficient TT
+  std::vector<BasisFunc> sketch_basis_;  // one BasisFunc per CV dimension
+  unsigned sketch_count_;    // number of TT updates performed so far (starts at 1, increments after each update)
+  MPS vb_;                   // current TT approximation of the accumulated bias (V_bias^TT)
+  double sketch_until_;      // simulation time after which the bias is frozen (no further updates)
+  bool frozen_;              // true once the bias has been frozen (after sketch_until_)
+  bool sketch_conv_;         // true if Gaussian smoothing (conv mode) is active for basis evaluation
+  bool nonintrusive_;        // if true, uses the intrusive-free variant: accumulates Bemp directly
+  // instead of re-projecting the previous vb_ through the Gram matrix
+  bool deterministic_;       // if true, use a fixed RNG seed (42) in createTTCoeff() for reproducibility
+  MPS B_prev_;               // accumulated tensor moment from all previous sketches (nonintrusive only)
+  std::vector<ITensor> A_prev_;   // accumulated cross-gram matrices per bond (nonintrusive only)
+
+  void readGaussians(IFile *ifile);
+  void writeGaussian(const Gaussian& hill, OFile& file);
+  double getHeight(const std::vector<double>& cv);
+  double getBias(const std::vector<double>& cv);
+  double getBiasAndDerivatives(const std::vector<double>& cv, std::vector<double>& der);
+  double evaluateGaussian(const std::vector<double>& cv, const Gaussian& hill);
+  double evaluateGaussianAndDerivatives(const std::vector<double>& cv, const Gaussian& hill, std::vector<double>& der, std::vector<double>& dp);
+  bool scanOneHill(IFile* ifile, std::vector<Value>& tmpvalues, std::vector<double>& center, std::vector<double>& sigma, double& height);
+  void paraSketch();
+  MPS createTTCoeff() const;
+  std::pair<std::vector<ITensor>, IndexSet> intBasisSample(const IndexSet& is) const;
+  std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> formTensorMoment(const std::vector<ITensor>& M, const MPS& coeff, const IndexSet& is);
+  std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> formTensorMomentVb(const MPS& coeff);
+
+public:
+  explicit TTMetaD(const ActionOptions&);
+  void calculate() override;
+  void update() override;
+  static void registerKeywords(Keywords& keys);
+};
+
+PLUMED_REGISTER_ACTION(TTMetaD, "TTMETAD")
+
+void TTMetaD::registerKeywords(Keywords& keys) {
+  Bias::registerKeywords(keys);
+  keys.addFlag("KERNEL_BASIS", false, "Specifies that local kernel basis should be used instead of Fourier basis");
+  keys.add("compulsory", "SIGMA", "the widths of the Gaussian hills");
+  keys.add("compulsory", "PACE", "the frequency for hill addition");
+  keys.add("compulsory", "FILE", "HILLS", "a file in which the list of added hills is stored");
+  keys.add("compulsory", "HEIGHT", "the heights of the Gaussian hills");
+  keys.add("optional", "FMT", "specify format for HILLS files (useful for decrease the number of digits in regtests)");
+  keys.add("optional", "BIASFACTOR", "use well tempered metadynamics and use this bias factor. Please note you must also specify temp");
+  keys.add("optional", "TEMP", "the system temperature - this is only needed if you are doing well-tempered metadynamics");
+  keys.addFlag("WALKERS_MPI", false, "To be used when gromacs + multiple walkers are used");
+  keys.add("optional", "WALKERS_DIR", "shared directory with the hills files from all the walkers");
+  keys.use("RESTART");
+  keys.add("optional", "SKETCH_RANK", "Target rank for TTSketch algorithm - compulsory if SKETCH_CUTOFF is not specified");
+  keys.add("optional", "SKETCH_CUTOFF", "Truncation error cutoff for singular value decomposition - compulsory if SKETCH_RANK is not specified");
+  keys.add("compulsory", "SKETCH_INITRANK", "Initial rank for TTSketch algorithm");
+  keys.add("compulsory", "SKETCH_PACE", "1e6", "The frequency for TT Vbias updates");
+  keys.add("compulsory", "INTERVAL_MIN", "Lower limits, outside the limits the system will not feel the biasing force");
+  keys.add("compulsory", "INTERVAL_MAX", "Upper limits, outside the limits the system will not feel the biasing force");
+  keys.add("compulsory", "SKETCH_NBASIS", "20", "Number of basis functions per dimension");
+  keys.add("compulsory", "SKETCH_ALPHA", "0.05", "Weight coefficient for random tensor train construction");
+  keys.add("optional", "SKETCH_UNTIL", "After this time, the bias potential freezes");
+  keys.add("optional", "SKETCH_WIDTH", "Width of Gaussian kernels for smoothing");
+  keys.add("optional", "KERNEL_DX", "Width of basis function kernels");
+  keys.addFlag("NONINTRUSIVE", false, "Sketching uses previous exact sum of Gaussians instead of TT approximation");
+  keys.addFlag("DETERMINISTIC", false, "Use a fixed random seed for TT sketch construction, ensuring reproducible results. Intended for regression testing.");
+}
+
+TTMetaD::TTMetaD(const ActionOptions& ao):
+  PLUMED_BIAS_INIT(ao),
+  kbt_(0.0),
+  stride_(0),
+  welltemp_(false),
+  biasf_(-1.0),
+  isFirstStep_(true),
+  height0_(std::numeric_limits<double>::max()),
+  mw_dir_(""),
+  walkers_mpi_(false),
+  mpi_size_(0),
+  mpi_rank_(0),
+  sketch_r_(0),
+  sketch_cutoff_(0.0),
+  sketch_count_(1),
+  sketch_until_(std::numeric_limits<double>::max()),
+  frozen_(false),
+  sketch_conv_(false),
+  nonintrusive_(false),
+  deterministic_(false) {
+  bool kernel;
+  parseFlag("KERNEL_BASIS", kernel);
+  this->d_ = getNumberOfArguments();
+  if(this->d_ < 2) {
+    error("Number of arguments must be at least 2");
+  }
+  parse("FMT", this->fmt_);
+  parseVector("SIGMA", this->sigma0_);
+  if(this->sigma0_.size() != d_) {
+    error("number of arguments does not match number of SIGMA parameters");
+  }
+  parse("HEIGHT", this->height0_);
+  parse("PACE", this->stride_);
+  if(stride_ <= 0) {
+    error("frequency for hill addition is nonsensical");
+  }
+  this->hillsfname_ = "HILLS";
+  parse("FILE", this->hillsfname_);
+  parse("BIASFACTOR", this->biasf_);
+  if(this->biasf_ < 1.0 && this->biasf_ != -1.0) {
+    error("well tempered bias factor is nonsensical");
+  }
+  this->kbt_ = getkBT();
+  if(this->biasf_ >= 1.0) {
+    if(this->kbt_ == 0.0) {
+      error("Unless the MD engine passes the temperature to plumed, with well-tempered metad you must specify it using TEMP");
+    }
+    this->welltemp_ = true;
+  }
+
+  parseFlag("WALKERS_MPI", this->walkers_mpi_);
+  parse("WALKERS_DIR", this->mw_dir_);
+  if(this->walkers_mpi_ && this->mw_dir_== "") {
+    const std::string ret = std::filesystem::current_path();
+    this->mw_dir_ = ret + "/";
+    multi_sim_comm.Bcast(this->mw_dir_, 0);
+  }
+  if(this->walkers_mpi_ && this->mw_dir_ != "") {
+    this->hillsfname_ = this->mw_dir_ + "/" + this->hillsfname_;
+  }
+
+  parse("SKETCH_RANK", this->sketch_r_);
+  parse("SKETCH_CUTOFF", this->sketch_cutoff_);
+  if(this->sketch_r_ <= 0 && (this->sketch_cutoff_ <= 0.0 || this->sketch_cutoff_ > 1.0)) {
+    error("Valid SKETCH_RANK or SKETCH_CUTOFF needs to be specified");
+  }
+  parse("SKETCH_INITRANK", this->sketch_rc_);
+  if(this->sketch_rc_ <= 0) {
+    error("SKETCH_INITRANK must be positive");
+  }
+  parse("SKETCH_PACE", this->sketch_stride_);
+  if(this->sketch_stride_ <= 0) {
+    error("SKETCH_PACE must be positive");
+  }
+  std::vector<double> interval_min;
+  parseVector("INTERVAL_MIN", interval_min);
+  if(interval_min.size() != this->d_) {
+    error("Number of arguments does not match number of INTERVAL_MIN parameters");
+  }
+  std::vector<double> interval_max;
+  parseVector("INTERVAL_MAX", interval_max);
+  if(interval_max.size() != this->d_) {
+    error("Number of arguments does not match number of INTERVAL_MAX parameters");
+  }
+  int nbasis = 20;
+  parse("SKETCH_NBASIS", nbasis);
+  if(nbasis <= 1) {
+    error("SKETCH_NBASIS must be greater than 1");
+  }
+  if(!kernel && nbasis % 2 == 0) {
+    ++nbasis;
+  }
+  parse("SKETCH_ALPHA", this->sketch_alpha_);
+  if(this->sketch_alpha_ <= 0.0 || this->sketch_alpha_ > 1.0) {
+    error("SKETCH_ALPHA must be positive and no greater than 1");
+  }
+  std::vector<double> w;
+  parseVector("SKETCH_WIDTH", w);
+  if(w.size() == 0) {
+    w.assign(this->d_, 0.0);
+  }
+  if(w.size() != this->d_) {
+    error("Number of arguments does not match number of SKETCH_WIDTH parameters");
+  }
+  for (double val : w) {
+    if (val != 0.0) {
+      this->sketch_conv_ = true;
+    }
+  }
+  std::vector<double> dx;
+  parseVector("KERNEL_DX", dx);
+  if(dx.size() == 0) {
+    dx.resize(this->d_, 0.0);
+  }
+  if(dx.size() != this->d_) {
+    error("Number of arguments does not match number of KERNEL_DX parameters");
+  }
+  for(unsigned i = 0; i < this->d_; ++i) {
+    if(this->sketch_conv_ && w[i] <= 0.0) {
+      error("Gaussian smoothing requires positive WIDTH");
+    }
+    if(kernel && dx[i] < 0.0) {
+      error("Kernel basis requires positive KERNEL_DX");
+    }
+    if(interval_max[i] <= interval_min[i]) {
+      error("INTERVAL_MAX parameters need to be greater than respective INTERVAL_MIN parameters");
+    }
+    this->sketch_basis_.push_back(BasisFunc(std::make_pair(interval_min[i], interval_max[i]), nbasis, w[i], kernel, dx[i]));
+  }
+  if(kernel && this->sketch_conv_) {
+    error("kernel smoothing incompatible with kernel basis");
+  }
+  if(this->walkers_mpi_) {
+    this->mpi_size_ = multi_sim_comm.Get_size();
+    this->mpi_rank_ = multi_sim_comm.Get_rank();
+  }
+
+  parse("SKETCH_UNTIL", this->sketch_until_);
+
+  parseFlag("NONINTRUSIVE", this->nonintrusive_);
+  parseFlag("DETERMINISTIC", this->deterministic_);
+
+  if(getRestart()) {
+    std::string ttfilename = "ttsketch.h5";
+    if(this->walkers_mpi_) {
+      ttfilename = "../" + ttfilename;
+    }
+    while(true) {
+      try {
+        this->vb_ = ttRead(ttfilename, ++this->sketch_count_);
+      } catch(...) {
+        --this->sketch_count_;
+        break;
+      }
+    }
+    if(this->sketch_count_ == 1) {
+      this->vb_ = MPS();
+    }
+    if(getTime() >= this->sketch_until_) {
+      this->frozen_ = true;
+    } else {
+      IFile hills_ifile;
+      if(!hills_ifile.FileExist(this->hillsfname_)) {
+        error("The hills file cannot be found");
+      }
+      hills_ifile.open(this->hillsfname_);
+      readGaussians(&hills_ifile);
+      hills_ifile.close();
+    }
+  }
+
+  if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+    this->hillsOfile_.link(*this);
+    this->hillsOfile_.enforceSuffix("");
+    this->hillsOfile_.open(this->hillsfname_);
+    if(this->fmt_.length() > 0) {
+      this->hillsOfile_.fmtField(this->fmt_);
+    }
+    hillsOfile_.setHeavyFlush();
+    for(unsigned i = 0; i < this->d_; ++i) {
+      hillsOfile_.setupPrintValue(getPntrToArgument(i));
+    }
+  }
+}
+
+void TTMetaD::readGaussians(IFile *ifile) {
+  std::vector<double> center(this->d_);
+  std::vector<double> sigma(this->d_);
+  double height;
+  int nhills = 0;
+
+  std::vector<Value> tmpvalues;
+  for(unsigned j = 0; j < this->d_; ++j) {
+    tmpvalues.push_back(Value(this, getPntrToArgument(j)->getName(), false));
+  }
+
+  while(scanOneHill(ifile, tmpvalues, center, sigma, height)) {
+    ++nhills;
+    if(this->welltemp_ && this->biasf_ > 1.0) {
+      height *= (this->biasf_ - 1.0) / this->biasf_;
+    }
+    this->hills_.push_back(Gaussian(height, center, sigma));
+  }
+  log << "  restarting from step " << this->sketch_count_ << "\n";
+  log << "  " << nhills << " hills retrieved\n";
+
+  log << "  Bibliography " << plumed.cite("Strand, Yang, Khoo, and Dinner, https://arxiv.org/abs/2603.13549 (2026)");
+  log << plumed.cite("Fishman, White, and Stoudenmire, https://arxiv.org/abs/2007.14822 (2020)");
+  log << "\n";
+}
+
+bool TTMetaD::scanOneHill(IFile* ifile, std::vector<Value>& tmpvalues, std::vector<double>& center, std::vector<double>& sigma, double& height) {
+  double dummy;
+  if(ifile->scanField("time", dummy)) {
+    unsigned ncv = tmpvalues.size();
+    for(unsigned i = 0; i < ncv; ++i) {
+      ifile->scanField(&tmpvalues[i]);
+      if(tmpvalues[i].isPeriodic() && !getPntrToArgument(i)->isPeriodic()) {
+        error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
+      } else if(tmpvalues[i].isPeriodic()) {
+        std::string imin, imax;
+        tmpvalues[i].getDomain(imin, imax);
+        std::string rmin, rmax;
+        getPntrToArgument(i)->getDomain(rmin, rmax);
+        if(imin != rmin || imax != rmax) {
+          error("in hills file periodicity for variable " + tmpvalues[i].getName() + " does not match periodicity in input");
+        }
+      }
+      center[i] = tmpvalues[i].get();
+    }
+    for(unsigned i = 0; i < ncv; ++i) {
+      ifile->scanField("sigma_" + getPntrToArgument(i)->getName(), sigma[i]);
+    }
+
+    ifile->scanField("height", height);
+    ifile->scanField("biasf", dummy);
+    ifile->scanField();
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void TTMetaD::writeGaussian(const Gaussian& hill, OFile&file) {
+  file.printField("time", getTimeStep() * getStep());
+  for(unsigned i = 0; i < this->d_; ++i) {
+    file.printField(getPntrToArgument(i), hill.center[i]);
+  }
+  for(unsigned i = 0; i < this->d_; ++i) {
+    file.printField("sigma_" + getPntrToArgument(i)->getName(), hill.sigma[i]);
+  }
+  double height = hill.height;
+  if(this->welltemp_ && this->biasf_ > 1.0) {
+    height *= this->biasf_ / (this->biasf_ - 1.0);
+  }
+  file.printField("height", height).printField("biasf", this->biasf_);
+  file.printField();
+}
+
+// Called every MD step: evaluate total bias (TT + remaining Gaussians) and set forces.
+void TTMetaD::calculate() {
+  std::vector<double> cv(this->d_);
+  for(unsigned i = 0; i < this->d_; ++i) {
+    cv[i] = getArgument(i);
+  }
+
+  std::vector<double> der(this->d_, 0.0);
+
+  double ene = getBiasAndDerivatives(cv, der);
+  setBias(ene);
+  for(unsigned i = 0; i < this->d_; ++i) {
+    setOutputForce(i, -der[i]);
+  }
+}
+
+// Called every MD step after forces are applied.
+// Two separate triggers:
+//   1. Every sketch_stride_ steps: run paraSketch() to compress accumulated hills into vb_,
+//      clear the hills list, and rewind the HILLS file for the next interval.
+//   2. Every stride_ steps: deposit a new Gaussian hill and write it to the HILLS file.
+void TTMetaD::update() {
+  bool nowAddATT;
+  if(getStep() % this->sketch_stride_ == 0 && !this->isFirstStep_ && !this->frozen_) {
+    nowAddATT = true;
+    if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+      this->hillsOfile_.flush();
+    }
+  } else {
+    nowAddATT = false;
+  }
+
+  if(nowAddATT) {
+    if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+      unsigned N = this->hills_.size();
+      log << "Sample limits\n";
+      for(unsigned i = 0; i < this->d_; ++i) {
+        auto [large, small] = this->sketch_basis_[i].dom();
+        for(unsigned j = 0; j < N; ++j) {
+          if(this->hills_[j].center[i] > large) {
+            large = this->hills_[j].center[i];
+          }
+          if(this->hills_[j].center[i] < small) {
+            small = this->hills_[j].center[i];
+          }
+        }
+        log << small << " " << large << "\n";
+      }
+
+      log << "\nEmpirical means:\n";
+      Matrix<double> sigmahat(this->d_, this->d_);
+      std::vector<double> muhat(this->d_, 0.0);
+      for(unsigned k = 0; k < this->d_; ++k) {
+        for(unsigned j = 0; j < N; ++j) {
+          muhat[k] += this->hills_[j].center[k] / N;
+        }
+        log << muhat[k] << " ";
+      }
+      log << "\nEmpirical covariance matrix:\n";
+      for(unsigned k = 0; k < this->d_; ++k) {
+        for(unsigned l = k; l < this->d_; ++l) {
+          sigmahat(k, l) = sigmahat(l, k) = 0.0;
+          for(unsigned j = 0; j < N; ++j) {
+            sigmahat(k, l) += (this->hills_[j].center[k] - muhat[k]) * (this->hills_[j].center[l] - muhat[l]) / (N - 1);
+          }
+          sigmahat(l, k) = sigmahat(k, l);
+        }
+      }
+      matrixOut(log, sigmahat);
+
+      // record current bias at hill centers before update, for computing relative error after
+      std::vector<double> A0(N);
+      Matrix<double> x(N, this->d_);
+      for(unsigned i = 0; i < N; ++i) {
+        for(unsigned j = 0; j < this->d_; ++j) {
+          x(i, j) = this->hills_[i].center[j];
+        }
+        A0[i] = getBias(this->hills_[i].center);
+      }
+
+      log << "\nStarting TT-sketch...\n";
+      log.flush();
+      paraSketch();
+      ++this->sketch_count_;
+
+      this->hills_.clear();
+
+      // compute relative L2 approximation error at hill centers: ||V_new - V_old|| / ||V_old||
+      std::vector<double> diff(N);
+      for(unsigned i = 0; i < N; ++i) {
+        std::vector<double> xi(this->d_);
+        for(unsigned j = 0; j < this->d_; ++j) {
+          xi[j] = x(i, j);
+        }
+        diff[i] = getBias(xi);
+      }
+      std::transform(diff.begin(), diff.end(), A0.begin(), diff.begin(), std::minus<double>());
+      log << "Relative l2 error = " << sqrt(norm(diff) / norm(A0)) << "\n\n";
+      log.flush();
+
+      std::string ttfilename = "ttsketch.h5";
+      if(this->walkers_mpi_) {
+        ttfilename = "../" + ttfilename;
+      }
+      ttWrite(ttfilename, this->vb_, this->sketch_count_);
+    }
+
+    if(this->walkers_mpi_) {
+      multi_sim_comm.Bcast(this->sketch_count_, 0);
+      if(this->mpi_rank_ != 0) {
+        this->hills_.clear();
+        this->vb_ = ttRead("../ttsketch.h5", this->sketch_count_);
+      }
+    }
+    if(getTime() >= this->sketch_until_) {
+      this->frozen_ = true;
+    } else if(!this->walkers_mpi_ || this->mpi_rank_ == 0) {
+      this->hillsOfile_.rewind();
+      this->hillsOfile_.clearFields();
+      if(this->fmt_.length() > 0) {
+        this->hillsOfile_.fmtField(this->fmt_);
+      }
+      hillsOfile_.setHeavyFlush();
+      for(unsigned i = 0; i < this->d_; ++i) {
+        hillsOfile_.setupPrintValue(getPntrToArgument(i));
+      }
+    }
+  }
+
+  bool nowAddAHill;
+  if(getStep() % this->stride_ == 0 && !isFirstStep_ && !this->frozen_) {
+    nowAddAHill = true;
+  } else {
+    nowAddAHill = false;
+    this->isFirstStep_ = false;
+  }
+
+  std::vector<double> cv(this->d_);
+  for(unsigned i = 0; i < this->d_; ++i) {
+    cv[i] = getArgument(i);
+  }
+
+  if(nowAddAHill) {
+    double height = getHeight(cv);
+
+    if(this->walkers_mpi_) {
+      std::vector<double> all_cv(this->mpi_size_ * this->d_, 0.0);
+      std::vector<double> all_sigma(this->mpi_size_ * this->sigma0_.size(), 0.0);
+      std::vector<double> all_height(this->mpi_size_, 0.0);
+      multi_sim_comm.Allgather(cv, all_cv);
+      multi_sim_comm.Allgather(this->sigma0_, all_sigma);
+      multi_sim_comm.Allgather(height * (this->biasf_ > 1.0 ? this->biasf_ / (this->biasf_ - 1.0) : 1.0), all_height);
+
+      for(int i = 0; i < this->mpi_size_; i++) {
+        std::vector<double> cv_now(this->d_);
+        std::vector<double> sigma_now(this->sigma0_.size());
+        for(unsigned j = 0; j < this->d_; j++) {
+          cv_now[j] = all_cv[i * this->d_ + j];
+        }
+        for(unsigned j = 0; j < this->sigma0_.size(); j++) {
+          sigma_now[j] = all_sigma[i * this->sigma0_.size() + j];
+        }
+        double fact = (this->biasf_ > 1.0 ? (this->biasf_ - 1.0) / this->biasf_ : 1.0);
+        Gaussian newhill(all_height[i] * fact, cv_now, sigma_now);
+        this->hills_.push_back(newhill);
+        if(this->mpi_rank_ == 0) {
+          writeGaussian(newhill, hillsOfile_);
+        }
+      }
+    } else {
+      Gaussian newhill(height, cv, this->sigma0_);
+      this->hills_.push_back(newhill);
+      writeGaussian(newhill, hillsOfile_);
+    }
+  }
+
+  if(getStep() % this->sketch_stride_ == 1 && !this->frozen_) {
+    log << "Vbias update " << this->sketch_count_ << "...\n\n";
+    log.flush();
+  }
+}
+
+double TTMetaD::getHeight(const std::vector<double>& cv) {
+  double height = this->height0_;
+  if(this->welltemp_) {
+    double vbias = getBias(cv);
+    if(this->biasf_ > 1.0) {
+      height = this->height0_ * exp(-vbias / (this->kbt_ * (this->biasf_ - 1.0)));
+    } else {
+      height = this->height0_ * exp(-vbias / this->kbt_);
+    }
+  }
+  return height;
+}
+
+// Total bias = TT approximation of accumulated hills + sum of recently deposited Gaussians.
+double TTMetaD::getBias(const std::vector<double>& cv) {
+  double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
+  unsigned nt = OpenMP::getNumThreads();
+  #pragma omp parallel num_threads(nt)
+  {
+    #pragma omp for reduction(+:bias) nowait
+    for(unsigned i = 0; i < hills_.size(); ++i) {
+      bias += evaluateGaussian(cv, this->hills_[i]);
+    }
+  }
+  return bias;
+}
+
+double TTMetaD::getBiasAndDerivatives(const std::vector<double>& cv, std::vector<double>& der) {
+  double bias = length(this->vb_) == 0 ? 0.0 : ttEval(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
+  if(length(this->vb_) != 0) {
+    der = ttGrad(this->vb_, this->sketch_basis_, cv, this->sketch_conv_);
+  }
+  unsigned nt = OpenMP::getNumThreads();
+  if(this->hills_.size() < 2 * nt || nt == 1) {
+    std::vector<double> dp(this->d_);
+    for(unsigned i = 0; i < this->hills_.size(); ++i) {
+      bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], der, dp);
+    }
+  } else {
+    #pragma omp parallel num_threads(nt)
+    {
+      std::vector<double> omp_deriv(this->d_, 0.0);
+      std::vector<double> dp(this->d_);
+      #pragma omp for reduction(+:bias) nowait
+      for(unsigned i = 0; i < this->hills_.size(); ++i) {
+        bias += evaluateGaussianAndDerivatives(cv, this->hills_[i], omp_deriv, dp);
+      }
+      #pragma omp critical
+      for(unsigned i = 0; i < this->d_; ++i) {
+        der[i] += omp_deriv[i];
+      }
+    }
+  }
+  return bias;
+}
+
+double TTMetaD::evaluateGaussian(const std::vector<double>& cv, const Gaussian& hill) {
+  double dp2 = 0.0;
+  for(unsigned i = 0; i < this->d_; i++) {
+    double dp = difference(i, hill.center[i], cv[i]) * hill.invsigma[i];
+    dp2 += dp * dp;
+  }
+  dp2 *= 0.5;
+
+  double bias = 0.0;
+  if(dp2 < dp2cutoff) {
+    bias = hill.height * exp(-dp2);
+  }
+
+  return bias;
+}
+
+double TTMetaD::evaluateGaussianAndDerivatives(const std::vector<double>& cv, const Gaussian& hill, std::vector<double>& der, std::vector<double>& dp) {
+  double dp2 = 0.0;
+  double bias = 0.0;
+  for(unsigned i = 0; i < this->d_; i++) {
+    dp[i] = difference(i, hill.center[i], cv[i]) * hill.invsigma[i];
+    dp2 += dp[i] * dp[i];
+  }
+  dp2 *= 0.5;
+  if(dp2 < dp2cutoff) {
+    bias = hill.height * exp(-dp2);
+    for(unsigned i = 0; i < this->d_; i++) {
+      der[i] -= bias * dp[i] * hill.invsigma[i];
+    }
+  }
+
+  return bias;
+}
+
+// TT-Sketch algorithm: compresses the N accumulated Gaussian hills plus the previous
+// TT bias vb_ into a new low-rank TT approximation G, which becomes the updated vb_.
+//
+// Overview (see paper Algorithm 2):
+//   1. createTTCoeff(): build a random sketch coefficient TT `coeff` of rank sketch_rc_.
+//   2. intBasisSample(): compute M[i][j,k] = <phi_k, g_j>_{dim i}, the inner product of
+//      each basis function with each Gaussian along dimension i.
+//   3. formTensorMoment(): contract coeff with M to get the tensor moment Bemp and
+//      left/right environment tensors envi_L, envi_R needed to form the normal equations.
+//   4. For each bond between cores k and k+1:
+//      - Form the cross-gram matrix A from envi_L and envi_R.
+//      - SVD A to get the rank-trimmed projection V (truncating to sketch_r_ or sketch_cutoff_).
+//   5. Recover TT cores G from Bemp and the pseudo-inverse of U*S.
+//   6. Apply Gram matrix correction (kernel basis only) to convert to dual basis coefficients.
+void TTMetaD::paraSketch() {
+  unsigned N = this->hills_.size();
+  auto coeff = createTTCoeff();
+  auto [M, is] = intBasisSample(siteInds(coeff));
+  MPS G(this->d_);
+
+  auto [Bemp, envi_L, envi_R] = formTensorMoment(M, coeff, is);
+  MPS Bemp_Vb;
+  std::vector<ITensor> envi_L_Vb;
+  std::vector<ITensor> envi_R_Vb;
+  if(this->sketch_count_ != 1) {
+    if(this->nonintrusive_) {
+      // Non-intrusive variant: accumulate tensor moments directly across sketches.
+      // B_prev_ stores the sum of Bemp from all prior intervals; re-index its bond/site
+      // indices to match the current coeff TT's indices before adding.
+      auto sites = siteInds(Bemp);
+      auto sites_prev = siteInds(this->B_prev_);
+      auto links = linkInds(Bemp);
+      auto links_prev = linkInds(this->B_prev_);
+      for(unsigned i = 1; i <= this->d_; ++i) {
+        this->B_prev_.ref(i) *= delta(sites(i), sites_prev(i));
+        if(i != 1) {
+          this->B_prev_.ref(i) *= delta(links(i - 1), links_prev(i - 1));
+        }
+        if (i != this->d_) {
+          this->B_prev_.ref(i) *= delta(links(i), links_prev(i));
+        }
+        Bemp.ref(i) += this->B_prev_(i);
+      }
+    } else {
+      // Intrusive variant: project the previous TT bias vb_ onto the current sketch's
+      // basis by pre-multiplying each core by the Gram matrix (kernel basis) or the
+      // convolution inner-product matrix (Fourier conv mode), then add its tensor moment.
+      if(this->sketch_basis_[0].kernel()) {
+        // Convert vb_ from dual basis to standard basis by applying Gram matrix per core
+        for(unsigned i = 1; i <= this->d_; ++i) {
+          auto s = siteIndex(this->vb_, i);
+          ITensor gram(s, prime(s));
+          for(int j = 1; j <= dim(s); ++j) {
+            for(int l = 1; l <= dim(s); ++l) {
+              gram.set(s = j, prime(s) = l, this->sketch_basis_[i - 1].gram()(j - 1, l - 1));
+            }
+          }
+          this->vb_.ref(i) *= gram;
+          this->vb_.ref(i).noPrime();
+        }
+      }
+      if(this->sketch_conv_) {
+        // Apply Fourier convolution damping factors to vb_ to account for basis smoothing
+        for(unsigned i = 1; i <= this->d_; ++i) {
+          double L = (this->sketch_basis_[i - 1].dom().second - this->sketch_basis_[i - 1].dom().first) / 2;
+          double w = this->sketch_basis_[i - 1].w();
+          auto s = siteIndex(this->vb_, i);
+          ITensor inner(s, prime(s));
+          for(int j = 1; j < dim(s); ++j) {
+            inner.set(s = j, prime(s) = j, exp(-pow(M_PI * w * (j / 2), 2) / (2 * pow(L, 2))));
+          }
+          this->vb_.ref(i) *= inner;
+          this->vb_.ref(i).noPrime();
+        }
+      }
+      auto vbresult = formTensorMomentVb(coeff);
+      Bemp_Vb = std::get<0>(vbresult);
+      envi_L_Vb = std::get<1>(vbresult);
+      envi_R_Vb = std::get<2>(vbresult);
+      for(unsigned i = 1; i <= this->d_; ++i) {
+        Bemp.ref(i) += Bemp_Vb(i);
+      }
+    }
+  }
+  if(this->nonintrusive_) {
+    this->B_prev_ = Bemp;
+    if(this->sketch_count_ == 1) {
+      this->A_prev_.resize(this->d_ - 1);
+    }
+  }
+  // For each bond between cores k-1 and k, form the cross-gram matrix A = L^T * R,
+  // where L = envi_L[k] and R = envi_R[k-2] are the left/right environment projections
+  // of coeff onto the sample indices. SVD A to find the rank-trimmed subspace V.
+  auto links = linkInds(coeff);
+  std::vector<ITensor> U(this->d_), S(this->d_), V(this->d_);
+  std::vector<Index> links_trimmed;
+  for(unsigned core_id = 2; core_id <= this->d_; ++core_id) {
+    int rank = dim(links(core_id - 1));
+    Matrix<double> LMat(N, rank), RMat(N, rank);
+    for(unsigned i = 1; i <= N; ++i) {
+      for(int j = 1; j <= rank; ++j) {
+        LMat(i - 1, j - 1) = envi_L[core_id - 1].elt(is(core_id) = i, links(core_id - 1) = j);
+        RMat(i - 1, j - 1) = envi_R[core_id - 2].elt(is(core_id - 1) = i, links(core_id - 1) = j);
+      }
+    }
+    Matrix<double> Lt, AMat, PMat, AMat_Vb;
+    transpose(LMat, Lt);
+    mult(Lt, RMat, AMat);
+
+    if(this->sketch_count_ != 1 && !this->nonintrusive_) {
+      auto ivb = linkIndex(this->vb_, core_id - 1);
+      int rank_vb = dim(ivb);
+      LMat = Matrix<double>(rank_vb, rank);
+      RMat = Matrix<double>(rank_vb, rank);
+      for(int i = 1; i <= rank_vb; ++i) {
+        for(int j = 1; j <= rank; ++j) {
+          LMat(i - 1, j - 1) = envi_L_Vb[core_id - 1].elt(ivb = i, links(core_id - 1) = j);
+          RMat(i - 1, j - 1) = envi_R_Vb[core_id - 2].elt(ivb = i, links(core_id - 1) = j);
+        }
+      }
+      transpose(LMat, Lt);
+      mult(Lt, RMat, AMat_Vb);
+    }
+
+    ITensor A(prime(links(core_id - 1)), links(core_id - 1));
+    for(int i = 1; i <= rank; ++i) {
+      for(int j = 1; j <= rank; ++j) {
+        A.set(prime(links(core_id - 1)) = i, links(core_id - 1) = j, AMat(i - 1, j - 1));
+      }
+    }
+    if(this->sketch_count_ != 1) {
+      if(this->nonintrusive_) {
+        this->A_prev_[core_id - 2] *= delta(this->A_prev_[core_id - 2].index(1), A.index(1));
+        this->A_prev_[core_id - 2] *= delta(this->A_prev_[core_id - 2].index(2), A.index(2));
+        A += this->A_prev_[core_id - 2];
+      } else {
+        ITensor A_Vb(prime(links(core_id - 1)), links(core_id - 1));
+        for(int i = 1; i <= rank; ++i) {
+          for(int j = 1; j <= rank; ++j) {
+            A_Vb.set(prime(links(core_id - 1)) = i, links(core_id - 1) = j, AMat_Vb(i - 1, j - 1));
+          }
+        }
+        A += A_Vb;
+      }
+    }
+    if(this->nonintrusive_) {
+      this->A_prev_[core_id - 2] = A;
+    }
+    auto original_link_tags = tags(links(core_id - 1));
+    V[core_id - 1] = ITensor(links(core_id - 1));
+    if(this->sketch_r_ > 0) {
+      svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1],
+      {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags, "MaxDim=", this->sketch_r_});
+    } else {
+      svd(A, U[core_id - 1], S[core_id - 1], V[core_id - 1], {"Cutoff=", this->sketch_cutoff_, "RightTags=", original_link_tags});
+    }
+    links_trimmed.push_back(commonIndex(S[core_id - 1], V[core_id - 1]));
+  }
+
+  // Recover TT cores G from Bemp and the trimmed subspaces.
+  // G[1] = Bemp[1] * V[1] (project first core onto rank-trimmed subspace)
+  // G[k] = pinv(U[k]*S[k]) * Bemp[k] * V[k]  for 2 <= k < d
+  // G[d] = pinv(U[d]*S[d]) * Bemp[d]
+  G.ref(1) = Bemp(1) * V[1];
+  for(unsigned core_id = 2; core_id <= this->d_; ++core_id) {
+    int rank = dim(links(core_id - 1)), rank_trimmed = dim(links_trimmed[core_id - 2]);
+    ITensor A = U[core_id - 1] * S[core_id - 1];
+    ITensor Pinv(links_trimmed[core_id - 2], links(core_id - 1));
+    Matrix<double> AMat(rank, rank_trimmed), PMat;
+    for(int i = 1; i <= rank; ++i) {
+      for(int j = 1; j <= rank_trimmed; ++j) {
+        AMat(i - 1, j - 1) = A.elt(prime(links(core_id - 1)) = i, links_trimmed[core_id - 2] = j);
+      }
+    }
+    pseudoInvert(AMat, PMat);
+
+    for(int i = 1; i <= rank_trimmed; ++i) {
+      for(int j = 1; j <= rank; ++j) {
+        Pinv.set(links_trimmed[core_id - 2] = i, links(core_id - 1) = j, PMat(i - 1, j - 1));
+      }
+    }
+    G.ref(core_id) = Pinv * Bemp(core_id);
+    if(core_id != this->d_) {
+      G.ref(core_id) *= V[core_id];
+    }
+  }
+
+  log << "Final ranks ";
+  for(unsigned i = 1; i < this->d_; ++i) {
+    log << dim(linkIndex(G, i)) << " ";
+  }
+  log << "\n";
+  log.flush();
+
+  if(this->sketch_basis_[0].kernel()) {
+    // Apply pseudo-inverse of the Gram matrix to each core to convert from the
+    // primal Gram representation back to dual basis coefficients (G^+ * G_core).
+    for(unsigned i = 1; i <= this->d_; ++i) {
+      auto s = siteIndex(G, i);
+      ITensor ginv(s, prime(s));
+      for(int j = 1; j <= dim(s); ++j) {
+        for(int l = 1; l <= dim(s); ++l) {
+          ginv.set(s = j, prime(s) = l, this->sketch_basis_[i - 1].ginv()(j - 1, l - 1));
+        }
+      }
+      G.ref(i) *= ginv;
+      G.ref(i).noPrime();
+    }
+  }
+
+  this->vb_ = G;
+}
+
+// Build the random sketch coefficient TT `coeff` of bond dimension sketch_rc_.
+// Each element is drawn from N(0,1); then each core is multiplied by a diagonal
+// matrix diag(1, alpha, alpha, ...) so the constant basis function (pos=1) has
+// full weight while all other harmonics are scaled down by sketch_alpha_. This
+// ensures the sketch captures the large constant component of the bias accurately.
+MPS TTMetaD::createTTCoeff() const {
+  std::default_random_engine generator(this->deterministic_ ? 42u : static_cast<unsigned int>(time(nullptr)));
+  std::normal_distribution<double> distribution(0.0, 1.0);
+  int n = this->sketch_basis_[0].nbasis();
+  auto sites = SiteSet(this->d_, n);
+  auto coeff = MPS(sites, this->sketch_rc_);
+  for(int j = 1; j <= n; ++j) {
+    for(int k = 1; k <= this->sketch_rc_; ++k) {
+      coeff.ref(1).set(sites(1) = j, linkIndex(coeff, 1) = k, distribution(generator));
+    }
+  }
+  for(unsigned i = 2; i <= this->d_ - 1; ++i) {
+    for(int j = 1; j <= n; ++j) {
+      for(int k = 1; k <= this->sketch_rc_; ++k) {
+        for(int l = 1; l <= this->sketch_rc_; ++l) {
+          coeff.ref(i).set(sites(i) = j, linkIndex(coeff, i - 1) = k, linkIndex(coeff, i) = l, distribution(generator));
+        }
+      }
+    }
+  }
+  for(int j = 1; j <= n; ++j) {
+    for(int k = 1; k <= this->sketch_rc_; ++k) {
+      coeff.ref(this->d_).set(sites(this->d_) = j, linkIndex(coeff, this->d_ - 1) = k, distribution(generator));
+    }
+  }
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    auto s = sites(i);
+    auto sp = prime(s);
+    std::vector<double> Avec(n, this->sketch_alpha_);
+    Avec[0] = 1.0;
+    auto A = diagITensor(Avec, s, sp);
+    coeff.ref(i) *= A;
+    coeff.ref(i).noPrime();
+  }
+  return coeff;
+}
+
+// Compute the matrix M[dim i][sample j, basis k] = <phi_k, g_j>_{dim i}, the
+// analytical inner product of each 1D basis function with the 1D marginal of each
+// Gaussian hill along dimension i. The d-dimensional Gaussian factorizes as a
+// product of d 1D Gaussians, so we distribute height as h^(1/d) per dimension.
+// These integrals are computed in closed form:
+//   - Kernel basis: convolution of the kernel function with the 1D Gaussian hill
+//   - Fourier basis: Fourier transform of the 1D Gaussian (exponential decay * harmonic)
+// Returns M as a vector of ITensors (one per dimension) and a new IndexSet is_new
+// where is_new(i) indexes the N sample points for dimension i.
+std::pair<std::vector<ITensor>, IndexSet> TTMetaD::intBasisSample(const IndexSet& is) const {
+  unsigned N = this->hills_.size();
+  int nb = this->sketch_basis_[0].nbasis();
+  auto sites_new = SiteSet(this->d_, N);
+  std::vector<ITensor> M;
+  std::vector<Index> is_new;
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    double L = (this->sketch_basis_[i - 1].dom().second - this->sketch_basis_[i - 1].dom().first) / 2;
+    double a = (this->sketch_basis_[i - 1].dom().second + this->sketch_basis_[i - 1].dom().first) / 2;
+    M.push_back(ITensor(sites_new(i), is(i)));
+    is_new.push_back(sites_new(i));
+    for(unsigned j = 1; j <= N; ++j) {
+      double x = this->hills_[j - 1].center[i - 1];
+      double w = this->hills_[j - 1].sigma[i - 1];
+      double h = pow(this->hills_[j - 1].height, 1.0 / this->d_);  // per-dimension d-th root of height
+      for(int pos = 1; pos <= nb; ++pos) {
+        double result = 0.0;
+        if(this->sketch_basis_[0].kernel()) {
+          if(pos == 1) {
+            // <1, g_j> = integral of 1D Gaussian = sqrt(2*pi)*sigma
+            result = h * sqrt(2 * M_PI) * w;
+          } else {
+            // <kernel_c, g_j> = conv of two Gaussians (widths dx and w), with periodic images
+            double c = this->sketch_basis_[i - 1].center(pos - 1);
+            double dx = this->sketch_basis_[i - 1].dx();
+            for(int k = -1; k <= 1; ++k) {
+              result += exp(-pow(x - c + 2 * k * L, 2) / (2 * (pow(dx, 2) + pow(w, 2)))) * h * sqrt(2 * M_PI) * w /
+                        (sqrt(1 / pow(dx, 2) + 1 / pow(w, 2)) * w);
+            }
+          }
+        } else {
+          if(pos == 1) {
+            // <1/sqrt(2L), g_j> = sqrt(pi/L) * sigma (Gaussian times constant)
+            result = h * sqrt(M_PI / L) * w;
+          } else if(pos % 2 == 0) {
+            // <cos harmonic, g_j>: Fourier transform of Gaussian times exponential damping
+            result = exp(-pow(M_PI * w * (pos / 2), 2) / (2 * pow(L, 2))) * h * sqrt(2 * M_PI / L) * w * cos(M_PI * (x - a) * (pos / 2) / L);
+          } else {
+            result = exp(-pow(M_PI * w * (pos / 2), 2) / (2 * pow(L, 2))) * h * sqrt(2 * M_PI / L) * w * sin(M_PI * (x - a) * (pos / 2) / L);
+          }
+        }
+        M.back().set(sites_new(i) = j, is(i) = pos, result);
+      }
+    }
+  }
+  return std::make_pair(M, IndexSet(is_new));
+}
+
+// Compute the tensor moment B and environment tensors for the new Gaussian hills.
+//
+// First, L = coeff with each core's basis index replaced by the sample index via M:
+//   L[i] = coeff[i] * M[i], so L has (sample_index, bond_left, bond_right) indices.
+//
+// envi_L[i] = partial contraction of L(1)...L(i) summed over sample indices 1..N,
+//   yielding shape (sample_index_{i+1}, link_i). Used to form the left half of A.
+// envi_R[i] = partial contraction of L(i+2)...L(d) summed over sample indices,
+//   yielding shape (sample_index_{i+1}, link_{i+1}). Used to form the right half of A.
+//
+// B[k] = sum_j envi_L[k-1][j, :] * envi_R[k-1][j, :] * M[k][:, basis_k],
+//   which is the "tensor moment" for core k: the sketch of the Gaussian sum projected
+//   onto the basis functions of dimension k and the sketch subspaces of all other dims.
+std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorMoment(const std::vector<ITensor>& M, const MPS& coeff, const IndexSet& is) {
+  int N = dim(is(1));
+  auto links = linkInds(coeff);
+  // L[i] = coeff[i] with basis index contracted against M[i] -> sample index
+  auto L = coeff;
+
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    L.ref(i) *= M[i - 1];
+  }
+
+  // Build left environments: envi_L[i] accumulates the contraction of L(1)..L(i)
+  // over the shared sample index, leaving the next sample index and link free.
+  std::vector<ITensor> envi_L(this->d_);
+  envi_L[1] = L(1) * delta(is(1), is(2));
+  for(unsigned i = 2; i < this->d_; ++i) {
+    int rankl = dim(links(i - 1));
+    int rankr = dim(links(i));
+    envi_L[i] = ITensor(is(i + 1), links(i));
+    for(int j = 1; j <= N; ++j) {
+      for(int k = 1; k <= rankr; ++k) {
+        ITensor LHS(links(i - 1)), RHS(links(i - 1));
+        for(int ii = 1; ii <= rankl; ++ii) {
+          LHS.set(links(i - 1) = ii, envi_L[i - 1].elt(is(i) = j, links(i - 1) = ii));
+          RHS.set(links(i - 1) = ii, L(i).elt(links(i - 1) = ii, is(i) = j, links(i) = k));
+        }
+        envi_L[i].set(is(i + 1) = j, links(i) = k, elt(LHS * RHS));
+      }
+    }
+  }
+
+  // Build right environments: envi_R[i] accumulates L(i+2)..L(d) right-to-left.
+  std::vector<ITensor> envi_R(this->d_);
+  envi_R[this->d_ - 2] = L(this->d_) * delta(is(this->d_), is(this->d_ - 1));
+  for(int i = this->d_ - 3; i >= 0; --i) {
+    int rankl = dim(links(i + 1));
+    int rankr = dim(links(i + 2));
+    envi_R[i] = ITensor(is(i + 1), links(i + 1));
+    for(int j = 1; j <= N; ++j) {
+      for(int k = 1; k <= rankl; ++k) {
+        ITensor LHS(links(i + 2)), RHS(links(i + 2));
+        for(int ii = 1; ii <= rankr; ++ii) {
+          LHS.set(links(i + 2) = ii, envi_R[i + 1].elt(is(i + 2) = j, links(i + 2) = ii));
+          RHS.set(links(i + 2) = ii, L(i + 2).elt(links(i + 2) = ii, is(i + 2) = j, links(i + 1) = k));
+        }
+        envi_R[i].set(is(i + 1) = j, links(i + 1) = k, elt(LHS * RHS));
+      }
+    }
+  }
+
+  // Assemble tensor moment B: for each core, combine the left and right environments
+  // element-wise over sample indices, then contract with M to restore basis indices.
+  MPS B(this->d_);
+  B.ref(1) = envi_R[0] * M[0];
+  for(unsigned core_id = 2; core_id < this->d_; ++core_id) {
+    int rankl = dim(links(core_id - 1));
+    int rankr = dim(links(core_id));
+    B.ref(core_id) = ITensor(links(core_id - 1), is(core_id), links(core_id));
+    for(int i = 1; i <= rankl; ++i) {
+      for(int j = 1; j <= rankr; ++j) {
+        for(int k = 1; k <= N; ++k) {
+          double Lelt = envi_L[core_id - 1].elt(is(core_id) = k, links(core_id - 1) = i);
+          double Relt = envi_R[core_id - 1].elt(is(core_id) = k, links(core_id) = j);
+          B.ref(core_id).set(links(core_id - 1) = i, is(core_id) = k, links(core_id) = j, Lelt * Relt);
+        }
+      }
+    }
+    B.ref(core_id) *= M[core_id - 1];
+  }
+  B.ref(this->d_) = envi_L[this->d_ - 1] * M[this->d_ - 1];
+
+  return std::make_tuple(B, envi_L, envi_R);
+}
+
+// Compute the tensor moment B and environments for the previous TT bias vb_.
+// This is the intrusive variant's contribution from the prior accumulated bias.
+// Analogous to formTensorMoment but uses vb_ in place of M*coeff: the "samples"
+// are the vb_ bond indices, and the environments are formed by contracting coeff
+// with vb_ core by core. The result has the same structure as Bemp from the new hills.
+std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorMomentVb(const MPS& coeff) {
+  // align vb_ site indices with coeff's site indices for contraction
+  for(unsigned i = 1; i <= this->d_; ++i) {
+    this->vb_.ref(i) *= delta(siteIndex(this->vb_, i), siteIndex(coeff, i));
+  }
+  std::vector<ITensor> envi_L(this->d_);
+  envi_L[1] = coeff(1) * this->vb_(1);
+  for(unsigned i = 2; i < this->d_; ++i) {
+    envi_L[i] = envi_L[i - 1] * coeff(i) * this->vb_(i);
+  }
+
+  std::vector<ITensor> envi_R(this->d_);
+  envi_R[this->d_ - 2] = coeff(this->d_) * this->vb_(this->d_);
+  for(int i = this->d_ - 3; i >= 0; --i) {
+    envi_R[i] = envi_R[i + 1] * coeff(i + 2) * this->vb_(i + 2);
+  }
+
+  MPS B(this->d_);
+  B.ref(1) = this->vb_(1) * envi_R[0];
+  for(unsigned core_id = 2; core_id < this->d_; ++core_id) {
+    B.ref(core_id) = envi_L[core_id - 1] * this->vb_(core_id) * envi_R[core_id - 1];
+  }
+  B.ref(this->d_) = envi_L[this->d_ - 1] * this->vb_(this->d_);
+
+  return std::make_tuple(B, envi_L, envi_R);
+}
+
+}
+}
+#endif // __PLUMED_HAS_LIBHDF5
+#endif // __PLUMED_HAS_LIBITENSOR

From 987084a600ddca7b597fe2787eb0db8eae432f28 Mon Sep 17 00:00:00 2001
From: strandn <nstrand@uchicago.edu>
Date: Tue, 12 May 2026 10:59:24 -0500
Subject: [PATCH 22/26] no forcing of flags

Co-authored-by: Daniele <Iximiel@users.noreply.github.com>
---
 configure.ac | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 637bb9c2be..531bdf4c73 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1054,8 +1054,7 @@ if test $libitensor = true ; then
   # GCC without forcing -std=c++20 globally. When PLUMED upgrades to -std=c++20
   # everywhere, this PLUMED_CHECK_CXXFLAG call can be removed.
   PLUMED_CHECK_CXXFLAG([-fconcepts])
-  save_CXXFLAGS="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -fconcepts -fopenmp"
+
   PLUMED_CHECK_CXX_PACKAGE([libitensor],[
     #include <itensor/all.h>
     int main() {
@@ -1065,10 +1064,6 @@ if test $libitensor = true ; then
       return 0;
     }
   ], [__PLUMED_HAS_LIBITENSOR], [ itensor openblas ], [true])
-  CXXFLAGS="$save_CXXFLAGS"
-  if test "$__PLUMED_HAS_LIBITENSOR" = yes ; then
-    PLUMED_CHECK_CXXFLAG([-fconcepts])
-  fi
 fi
 
 acc_found=ko

From 64def24b311ddec9ba40feffe70e8ff6b529f1f9 Mon Sep 17 00:00:00 2001
From: Nils Strand <nstrand@midway3-login3.rcc.local>
Date: Thu, 14 May 2026 18:30:52 -0500
Subject: [PATCH 23/26] fix configure plus nvhpc

---
 .github/workflows/linuxWF.yml |  2 +-
 configure                     | 52 +----------------------------------
 2 files changed, 2 insertions(+), 52 deletions(-)

diff --git a/.github/workflows/linuxWF.yml b/.github/workflows/linuxWF.yml
index 05ca69a920..38215732b4 100644
--- a/.github/workflows/linuxWF.yml
+++ b/.github/workflows/linuxWF.yml
@@ -77,7 +77,7 @@ jobs:
       # with libitensor.a (built without that flag), causing compilation errors in
       # ITensor headers and runtime corruption. The #ifdef __PLUMED_HAS_LIBITENSOR
       # guards in ttsketch ensure safe compilation when ITensor is not configured.
-      if: ${{ contains( matrix.variant, '-mpi-' ) && ! contains( matrix.variant, '-debug-' ) }}
+      if: ${{ contains( matrix.variant, '-mpi-' ) && ! contains( matrix.variant, '-debug-' ) && ! contains( matrix.variant, '-nvhpc-' ) }}
       run: |
         ITENSOR_DIR="$HOME/opt/itensor"
         HDF5_CFLAGS=$(pkg-config --cflags hdf5-serial 2>/dev/null || pkg-config --cflags hdf5 2>/dev/null || echo "-I/usr/include/hdf5/serial")
diff --git a/configure b/configure
index fed871bf96..c4935cc71b 100755
--- a/configure
+++ b/configure
@@ -10554,8 +10554,7 @@ $as_echo "no" >&6; }; CXXFLAGS="$save_CXXFLAGS"
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
-  save_CXXFLAGS="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -fconcepts -fopenmp"
+
 
     found=ko
     __PLUMED_HAS_LIBITENSOR=no
@@ -10680,55 +10679,6 @@ $as_echo "$as_me: WARNING: cannot enable __PLUMED_HAS_LIBITENSOR" >&2;}
       LIBS="$save_LIBS"
     fi
 
-  CXXFLAGS="$save_CXXFLAGS"
-  if test "$__PLUMED_HAS_LIBITENSOR" = yes ; then
-
-  save_CXXFLAGS="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -fconcepts"
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -fconcepts" >&5
-$as_echo_n "checking whether $CXX accepts -fconcepts... " >&6; }
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-int
-main ()
-{
-
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
-
-      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-int
-main ()
-{
-
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: not linking" >&5
-$as_echo "not linking" >&6; }; CXXFLAGS="$save_CXXFLAGS"
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
-
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }; CXXFLAGS="$save_CXXFLAGS"
-
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-
-  fi
 fi
 
 acc_found=ko

From 476c4836e1b8bfac8a3dba888ab903b16787de5d Mon Sep 17 00:00:00 2001
From: Nils Strand <nstrand@midway3-login3.rcc.local>
Date: Sun, 17 May 2026 22:13:27 -0500
Subject: [PATCH 24/26] fix configure issue plus stub class

---
 configure                | 96 +++++++++++++++++++++-------------------
 configure.ac             |  8 +++-
 src/ttsketch/TTMetaD.cpp | 89 +++++++++++++++++++++----------------
 3 files changed, 109 insertions(+), 84 deletions(-)

diff --git a/configure b/configure
index c4935cc71b..75a6fd6ee5 100755
--- a/configure
+++ b/configure
@@ -10508,53 +10508,10 @@ if test $libitensor = true ; then
   # NOTE: ITensor headers use C++20 'requires' expressions. -fconcepts enables them on
   # GCC without forcing -std=c++20 globally. When PLUMED upgrades to -std=c++20
   # everywhere, this PLUMED_CHECK_CXXFLAG call can be removed.
+  # PLUMED_CHECK_CXXFLAG([-fconcepts])
 
   save_CXXFLAGS="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -fconcepts"
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -fconcepts" >&5
-$as_echo_n "checking whether $CXX accepts -fconcepts... " >&6; }
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-int
-main ()
-{
-
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
-
-      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-int
-main ()
-{
-
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: not linking" >&5
-$as_echo "not linking" >&6; }; CXXFLAGS="$save_CXXFLAGS"
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
-
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }; CXXFLAGS="$save_CXXFLAGS"
-
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-
-
+  CXXFLAGS="$CXXFLAGS -fconcepts $OPENMP_CXXFLAGS"
 
     found=ko
     __PLUMED_HAS_LIBITENSOR=no
@@ -10679,6 +10636,55 @@ $as_echo "$as_me: WARNING: cannot enable __PLUMED_HAS_LIBITENSOR" >&2;}
       LIBS="$save_LIBS"
     fi
 
+  CXXFLAGS="$save_CXXFLAGS"
+  if test "$__PLUMED_HAS_LIBITENSOR" = yes ; then
+
+  save_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -fconcepts"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -fconcepts" >&5
+$as_echo_n "checking whether $CXX accepts -fconcepts... " >&6; }
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: not linking" >&5
+$as_echo "not linking" >&6; }; CXXFLAGS="$save_CXXFLAGS"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }; CXXFLAGS="$save_CXXFLAGS"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+  fi
 fi
 
 acc_found=ko
diff --git a/configure.ac b/configure.ac
index 531bdf4c73..1946731b94 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1053,8 +1053,10 @@ if test $libitensor = true ; then
   # NOTE: ITensor headers use C++20 'requires' expressions. -fconcepts enables them on
   # GCC without forcing -std=c++20 globally. When PLUMED upgrades to -std=c++20
   # everywhere, this PLUMED_CHECK_CXXFLAG call can be removed.
-  PLUMED_CHECK_CXXFLAG([-fconcepts])
+  # PLUMED_CHECK_CXXFLAG([-fconcepts])
 
+  save_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -fconcepts $OPENMP_CXXFLAGS"
   PLUMED_CHECK_CXX_PACKAGE([libitensor],[
     #include <itensor/all.h>
     int main() {
@@ -1064,6 +1066,10 @@ if test $libitensor = true ; then
       return 0;
     }
   ], [__PLUMED_HAS_LIBITENSOR], [ itensor openblas ], [true])
+  CXXFLAGS="$save_CXXFLAGS"
+  if test "$__PLUMED_HAS_LIBITENSOR" = yes ; then
+    PLUMED_CHECK_CXXFLAG([-fconcepts])
+  fi
 fi
 
 acc_found=ko
diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index 367eef81f3..938f9b1afc 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -6,8 +6,6 @@
  */
 
 #include "TTHelper.h"
-#ifdef __PLUMED_HAS_LIBITENSOR
-#ifdef __PLUMED_HAS_LIBHDF5
 #include "bias/Bias.h"
 #include "core/ActionRegister.h"
 #include "core/ActionSet.h"
@@ -17,9 +15,6 @@
 #include "tools/File.h"
 #include "tools/OpenMP.h"
 
-using namespace itensor;
-using namespace PLMD::bias;
-
 namespace PLMD {
 namespace ttsketch {
 
@@ -60,6 +55,24 @@ PRINT ARG=tt.bias FILE=COLVAR STRIDE=100
 */
 //+ENDPLUMEDOC
 
+#if !defined(__PLUMED_HAS_LIBITENSOR) || !defined(__PLUMED_HAS_LIBHDF5)
+
+class TTMetaD : public Bias {
+public:
+  static void registerKeywords(Keywords& keys);
+  explicit TTMetaD(const ActionOptions& ao) : Action(ao), Bias(ao) {
+    error("TTMETAD requires libitensor and libhdf5. "
+          "Reconfigure with --enable-libitensor.");
+  }
+  void calculate() override {}
+  void update() override {}
+};
+
+#else
+
+using namespace itensor;
+using namespace PLMD::bias;
+
 class TTMetaD : public Bias {
 
 private:
@@ -135,36 +148,6 @@ class TTMetaD : public Bias {
   static void registerKeywords(Keywords& keys);
 };
 
-PLUMED_REGISTER_ACTION(TTMetaD, "TTMETAD")
-
-void TTMetaD::registerKeywords(Keywords& keys) {
-  Bias::registerKeywords(keys);
-  keys.addFlag("KERNEL_BASIS", false, "Specifies that local kernel basis should be used instead of Fourier basis");
-  keys.add("compulsory", "SIGMA", "the widths of the Gaussian hills");
-  keys.add("compulsory", "PACE", "the frequency for hill addition");
-  keys.add("compulsory", "FILE", "HILLS", "a file in which the list of added hills is stored");
-  keys.add("compulsory", "HEIGHT", "the heights of the Gaussian hills");
-  keys.add("optional", "FMT", "specify format for HILLS files (useful for decrease the number of digits in regtests)");
-  keys.add("optional", "BIASFACTOR", "use well tempered metadynamics and use this bias factor. Please note you must also specify temp");
-  keys.add("optional", "TEMP", "the system temperature - this is only needed if you are doing well-tempered metadynamics");
-  keys.addFlag("WALKERS_MPI", false, "To be used when gromacs + multiple walkers are used");
-  keys.add("optional", "WALKERS_DIR", "shared directory with the hills files from all the walkers");
-  keys.use("RESTART");
-  keys.add("optional", "SKETCH_RANK", "Target rank for TTSketch algorithm - compulsory if SKETCH_CUTOFF is not specified");
-  keys.add("optional", "SKETCH_CUTOFF", "Truncation error cutoff for singular value decomposition - compulsory if SKETCH_RANK is not specified");
-  keys.add("compulsory", "SKETCH_INITRANK", "Initial rank for TTSketch algorithm");
-  keys.add("compulsory", "SKETCH_PACE", "1e6", "The frequency for TT Vbias updates");
-  keys.add("compulsory", "INTERVAL_MIN", "Lower limits, outside the limits the system will not feel the biasing force");
-  keys.add("compulsory", "INTERVAL_MAX", "Upper limits, outside the limits the system will not feel the biasing force");
-  keys.add("compulsory", "SKETCH_NBASIS", "20", "Number of basis functions per dimension");
-  keys.add("compulsory", "SKETCH_ALPHA", "0.05", "Weight coefficient for random tensor train construction");
-  keys.add("optional", "SKETCH_UNTIL", "After this time, the bias potential freezes");
-  keys.add("optional", "SKETCH_WIDTH", "Width of Gaussian kernels for smoothing");
-  keys.add("optional", "KERNEL_DX", "Width of basis function kernels");
-  keys.addFlag("NONINTRUSIVE", false, "Sketching uses previous exact sum of Gaussians instead of TT approximation");
-  keys.addFlag("DETERMINISTIC", false, "Use a fixed random seed for TT sketch construction, ensuring reproducible results. Intended for regression testing.");
-}
-
 TTMetaD::TTMetaD(const ActionOptions& ao):
   PLUMED_BIAS_INIT(ao),
   kbt_(0.0),
@@ -1126,7 +1109,37 @@ std::tuple<MPS, std::vector<ITensor>, std::vector<ITensor>> TTMetaD::formTensorM
   return std::make_tuple(B, envi_L, envi_R);
 }
 
+#endif // !defined(__PLUMED_HAS_LIBITENSOR) || !defined(__PLUMED_HAS_LIBHDF5)
+
+void TTMetaD::registerKeywords(Keywords& keys) {
+  Bias::registerKeywords(keys);
+  keys.addFlag("KERNEL_BASIS", false, "Specifies that local kernel basis should be used instead of Fourier basis");
+  keys.add("compulsory", "SIGMA", "the widths of the Gaussian hills");
+  keys.add("compulsory", "PACE", "the frequency for hill addition");
+  keys.add("compulsory", "FILE", "HILLS", "a file in which the list of added hills is stored");
+  keys.add("compulsory", "HEIGHT", "the heights of the Gaussian hills");
+  keys.add("optional", "FMT", "specify format for HILLS files (useful for decrease the number of digits in regtests)");
+  keys.add("optional", "BIASFACTOR", "use well tempered metadynamics and use this bias factor. Please note you must also specify temp");
+  keys.add("optional", "TEMP", "the system temperature - this is only needed if you are doing well-tempered metadynamics");
+  keys.addFlag("WALKERS_MPI", false, "To be used when gromacs + multiple walkers are used");
+  keys.add("optional", "WALKERS_DIR", "shared directory with the hills files from all the walkers");
+  keys.use("RESTART");
+  keys.add("optional", "SKETCH_RANK", "Target rank for TTSketch algorithm - compulsory if SKETCH_CUTOFF is not specified");
+  keys.add("optional", "SKETCH_CUTOFF", "Truncation error cutoff for singular value decomposition - compulsory if SKETCH_RANK is not specified");
+  keys.add("compulsory", "SKETCH_INITRANK", "Initial rank for TTSketch algorithm");
+  keys.add("compulsory", "SKETCH_PACE", "1e6", "The frequency for TT Vbias updates");
+  keys.add("compulsory", "INTERVAL_MIN", "Lower limits, outside the limits the system will not feel the biasing force");
+  keys.add("compulsory", "INTERVAL_MAX", "Upper limits, outside the limits the system will not feel the biasing force");
+  keys.add("compulsory", "SKETCH_NBASIS", "20", "Number of basis functions per dimension");
+  keys.add("compulsory", "SKETCH_ALPHA", "0.05", "Weight coefficient for random tensor train construction");
+  keys.add("optional", "SKETCH_UNTIL", "After this time, the bias potential freezes");
+  keys.add("optional", "SKETCH_WIDTH", "Width of Gaussian kernels for smoothing");
+  keys.add("optional", "KERNEL_DX", "Width of basis function kernels");
+  keys.addFlag("NONINTRUSIVE", false, "Sketching uses previous exact sum of Gaussians instead of TT approximation");
+  keys.addFlag("DETERMINISTIC", false, "Use a fixed random seed for TT sketch construction, ensuring reproducible results. Intended for regression testing.");
 }
-}
-#endif // __PLUMED_HAS_LIBHDF5
-#endif // __PLUMED_HAS_LIBITENSOR
+
+PLUMED_REGISTER_ACTION(TTMetaD, "TTMETAD")
+
+} // namespace ttsketch
+} // namespace PLMD

From 1289d44cd2f9cd38623e9bc7882cf3116a6e85cf Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Mon, 18 May 2026 12:20:59 -0500
Subject: [PATCH 25/26] namespace bias issue

---
 src/ttsketch/TTMetaD.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ttsketch/TTMetaD.cpp b/src/ttsketch/TTMetaD.cpp
index 938f9b1afc..6238f80133 100644
--- a/src/ttsketch/TTMetaD.cpp
+++ b/src/ttsketch/TTMetaD.cpp
@@ -55,6 +55,8 @@ PRINT ARG=tt.bias FILE=COLVAR STRIDE=100
 */
 //+ENDPLUMEDOC
 
+using namespace PLMD::bias;
+
 #if !defined(__PLUMED_HAS_LIBITENSOR) || !defined(__PLUMED_HAS_LIBHDF5)
 
 class TTMetaD : public Bias {
@@ -71,7 +73,6 @@ class TTMetaD : public Bias {
 #else
 
 using namespace itensor;
-using namespace PLMD::bias;
 
 class TTMetaD : public Bias {
 

From 91c8da98ed805d516d151eded2a15ce934892fe0 Mon Sep 17 00:00:00 2001
From: Nils Strand <nils.strand17@gmail.com>
Date: Mon, 1 Jun 2026 12:31:22 -0500
Subject: [PATCH 26/26] skip the Install step for debug builds

---
 .github/workflows/linuxWF.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/linuxWF.yml b/.github/workflows/linuxWF.yml
index 38215732b4..877dbb0adc 100644
--- a/.github/workflows/linuxWF.yml
+++ b/.github/workflows/linuxWF.yml
@@ -69,7 +69,7 @@ jobs:
         # the flags above enable the use of both libmetatomic and libtorch
         echo "PLUMED_CONFIG=$PLUMED_CONFIG --enable-libmetatomic --enable-libtorch" >> $GITHUB_ENV
     - name: Install ITensor
-      if: ${{ contains( matrix.variant, '-mpi-' ) }}
+      if: ${{ contains( matrix.variant, '-mpi-' ) && ! contains( matrix.variant, '-debug-' ) }}
       run: |
         .ci/install.itensor
     - name: Configure ITensor